diff --git a/compiled/labs/scanpy/scanpy_07_trajectory.ipynb b/compiled/labs/scanpy/scanpy_07_trajectory.ipynb deleted file mode 100644 index 6d6cb929..00000000 --- a/compiled/labs/scanpy/scanpy_07_trajectory.ipynb +++ /dev/null @@ -1,597 +0,0 @@ -{ - "cells": [ - { - "cell_type": "raw", - "metadata": {}, - "source": [ - "---\n", - "description: Reconstructing developmental or differentiation pathways\n", - "subtitle: Scanpy Toolkit\n", - "title: Trajectory inference using PAGA\n", - "---" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "
\n", - "\n", - "> **Note**\n", - ">\n", - "> Code chunks run Python commands unless it starts with `%%bash`, in\n", - "> which case, those chunks run shell commands.\n", - "\n", - "
\n", - "\n", - "Partly following [this\n", - "tutorial](https://scanpy-tutorials.readthedocs.io/en/latest/paga-paul15.html).\n", - "\n", - "## Loading libraries" - ] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as pl\n", - "from matplotlib import rcParams\n", - "import scanpy as sc\n", - "\n", - "import scipy\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import warnings\n", - "\n", - "warnings.simplefilter(action=\"ignore\", category=Warning)\n", - "\n", - "# verbosity: errors (0), warnings (1), info (2), hints (3)\n", - "sc.settings.verbosity = 3\n", - "sc.settings.set_figure_params(dpi=100, frameon=False, figsize=(5, 5), facecolor='white', color_map = 'viridis_r') " - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Loading data\n", - "\n", - "In order to speed up the computations during the exercises, we will be\n", - "using a subset of a bone marrow dataset (originally containing about\n", - "100K cells). The bone marrow is the source of adult immune cells, and\n", - "contains virtually all differentiation stages of cell from the immune\n", - "system which later circulate in the blood to all other organs.\n", - "\n", - "![](../figs/hematopoiesis.png)\n", - "\n", - "All the data has been preprocessed with Seurat. The file\n", - "trajectory_scanpy_filtered.h5ad was converted from the Seurat object\n", - "using the SeuratDisk package. For more information on how it was done,\n", - "have a look at the script: convert_to_h5ad.R in the github repo." - ] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "import os\n", - "\n", - "path_data = \"https://export.uppmax.uu.se/naiss2023-23-3/workshops/workshop-scrnaseq\"\n", - "\n", - "path_trajectory = \"./data/trajectory\"\n", - "if not os.path.exists(path_trajectory):\n", - " os.makedirs(path_trajectory, exist_ok=True)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "import urllib.request\n", - "\n", - "path_results = \"data/trajectory\"\n", - "if not os.path.exists(path_results):\n", - " os.makedirs(path_results, exist_ok=True)\n", - "\n", - "path_file = \"data/trajectory/trajectory_seurat_filtered.h5ad\"\n", - "if not os.path.exists(path_file):\n", - " file_url = os.path.join(\n", - " path_data, \"trajectory/trajectory_seurat_filtered.h5ad\")\n", - " urllib.request.urlretrieve(file_url, path_file)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Check that the variable names are correct." - ] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "adata = sc.read_h5ad(\"data/trajectory/trajectory_seurat_filtered.h5ad\")\n", - "adata.var" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "# check what you have in the X matrix, should be lognormalized counts.\n", - "print(adata.X[:10,:10])" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Explore the data\n", - "\n", - "There is a umap and clusters provided with the object, first plot some\n", - "information from the previous analysis onto the umap." - ] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "sc.pl.umap(adata, color = ['clusters','dataset','batches','Phase'],legend_loc = 'on data', legend_fontsize = 'xx-small', ncols = 2)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It is crucial that you performing analysis of a dataset understands what\n", - "is going on, what are the clusters you see in your data and most\n", - "importantly How are the clusters related to each other?. Well, let's\n", - "explore the data a bit. With the help of this table, write down which\n", - "cluster numbers in your dataset express these key markers.\n", - "\n", - " Marker Cell Type\n", - " --------- ----------------------------\n", - " Cd34 HSC progenitor\n", - " Ms4a1 B cell lineage\n", - " Cd3e T cell lineage\n", - " Ltf Granulocyte lineage\n", - " Cst3 Monocyte lineage\n", - " Mcpt8 Mast Cell lineage\n", - " Alas2 RBC lineage\n", - " Siglech Dendritic cell lineage\n", - " C1qc Macrophage cell lineage\n", - " Pf4 Megakaryocyte cell lineage" - ] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "markers = [\"Cd34\",\"Alas2\",\"Pf4\",\"Mcpt8\",\"Ltf\",\"Cst3\", \"Siglech\", \"C1qc\", \"Ms4a1\", \"Cd3e\", ]\n", - "sc.pl.umap(adata, color = markers, use_raw = False, ncols = 4)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Rerun analysis in Scanpy\n", - "\n", - "Redo clustering and umap using the basic Scanpy pipeline. Use the\n", - "provided \"X_harmony_Phase\" dimensionality reduction as the staring\n", - "point." - ] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "# first, store the old umap with a new name so it is not overwritten\n", - "adata.obsm['X_umap_old'] = adata.obsm['X_umap']\n", - "\n", - "sc.pp.neighbors(adata, n_pcs = 30, n_neighbors = 20, use_rep=\"X_harmony_Phase\")\n", - "sc.tl.umap(adata, min_dist=0.4, spread=3)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "#sc.tl.umap(adata, min_dist=0.6, spread=1.5)\n", - "sc.pl.umap(adata, color = ['clusters'],legend_loc = 'on data', legend_fontsize = 'xx-small', edges = True)\n", - "\n", - "sc.pl.umap(adata, color = markers, use_raw = False, ncols = 4)\n", - "\n", - "# Redo clustering as well\n", - "sc.tl.leiden(adata, key_added = \"leiden_1.0\", resolution = 1.0) # default resolution in 1.0\n", - "sc.tl.leiden(adata, key_added = \"leiden_1.2\", resolution = 1.2) # default resolution in 1.0\n", - "sc.tl.leiden(adata, key_added = \"leiden_1.4\", resolution = 1.4) # default resolution in 1.0\n", - "\n", - "#sc.tl.louvain(adata, key_added = \"leiden_1.0\") # default resolution in 1.0\n", - "sc.pl.umap(adata, color = ['leiden_1.0', 'leiden_1.2', 'leiden_1.4','clusters'],legend_loc = 'on data', legend_fontsize = 'xx-small', ncols =2)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "#Rename clusters with really clear markers, the rest are left unlabelled.\n", - "\n", - "annot = pd.DataFrame(adata.obs['leiden_1.4'].astype('string'))\n", - "annot[annot['leiden_1.4'] == '10'] = '10_megakaryo' #Pf4\n", - "annot[annot['leiden_1.4'] == '17'] = '17_macro' #C1qc\n", - "annot[annot['leiden_1.4'] == '11'] = '11_eryth' #Alas2\n", - "annot[annot['leiden_1.4'] == '18'] = '18_dend' #Siglech\n", - "annot[annot['leiden_1.4'] == '13'] = '13_mast' #Mcpt8\n", - "annot[annot['leiden_1.4'] == '0'] = '0_mono' #Cts3\n", - "annot[annot['leiden_1.4'] == '1'] = '1_gran' #Ltf\n", - "annot[annot['leiden_1.4'] == '9'] = '9_gran'\n", - "annot[annot['leiden_1.4'] == '14'] = '14_TC' #Cd3e\n", - "annot[annot['leiden_1.4'] == '16'] = '16_BC' #Ms4a1\n", - "annot[annot['leiden_1.4'] == '8'] = '8_progen' # Cd34\n", - "annot[annot['leiden_1.4'] == '4'] = '4_progen' \n", - "annot[annot['leiden_1.4'] == '5'] = '5_progen'\n", - "\n", - "adata.obs['annot']=annot['leiden_1.4'].astype('category')\n", - "\n", - "sc.pl.umap(adata, color = 'annot',legend_loc = 'on data', legend_fontsize = 'xx-small', ncols =2)\n", - "\n", - "annot.value_counts()\n", - "#type(annot)\n", - "\n", - "# astype('category')" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "# plot onto the Seurat embedding:\n", - "sc.pl.embedding(adata, basis='X_umap_old', color = 'annot',legend_loc = 'on data', legend_fontsize = 'xx-small', ncols =2)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Run PAGA\n", - "\n", - "Use the clusters from leiden clustering with leiden_1.4 and run PAGA.\n", - "First we create the graph and initialize the positions using the umap." - ] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "# use the umap to initialize the graph layout.\n", - "sc.tl.draw_graph(adata, init_pos='X_umap')\n", - "sc.pl.draw_graph(adata, color='annot', legend_loc='on data', legend_fontsize = 'xx-small')\n", - "sc.tl.paga(adata, groups='annot')\n", - "sc.pl.paga(adata, color='annot', edge_width_scale = 0.3)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As you can see, we have edges between many clusters that we know are are\n", - "unrelated, so we may need to clean up the data a bit more.\n", - "\n", - "## Data pre-processing prior trajectory inference\n", - "\n", - "First, lets explore the graph a bit. So we plot the umap with the graph\n", - "connections on top." - ] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "sc.pl.umap(adata, edges=True, color = 'annot', legend_loc= 'on data', legend_fontsize= 'xx-small')" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We have many edges in the graph between unrelated clusters, so lets try\n", - "with fewer neighbors." - ] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "sc.pp.neighbors(adata, n_neighbors=5, use_rep = 'X_harmony_Phase', n_pcs = 30)\n", - "sc.pl.umap(adata, edges=True, color = 'annot', legend_loc= 'on data', legend_fontsize= 'xx-small')" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Rerun PAGA again on the data" - ] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "sc.tl.draw_graph(adata, init_pos='X_umap')\n", - "sc.pl.draw_graph(adata, color='annot', legend_loc='on data', legend_fontsize = 'xx-small')" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "sc.tl.paga(adata, groups='annot')\n", - "sc.pl.paga(adata, color='annot', edge_width_scale = 0.3)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Recomputing the embedding using PAGA-initialization\n", - "\n", - "The following is just as well possible for a UMAP." - ] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "sc.tl.draw_graph(adata, init_pos='paga')" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we can see all marker genes also at single-cell resolution in a\n", - "meaningful layout." - ] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "sc.pl.draw_graph(adata, color=['annot'], legend_loc='on data', legend_fontsize= 'xx-small')" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Compare the 2 graphs" - ] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "sc.pl.paga_compare(\n", - " adata, threshold=0.03, title='', right_margin=0.2, size=10, edge_width_scale=0.5,\n", - " legend_fontsize=12, fontsize=12, frameon=False, edges=True)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Reconstructing gene changes along PAGA paths for a given set of genes\n", - "\n", - "Choose a root cell for diffusion pseudotime. We have 3 progenitor\n", - "clusters, but cluster 5 seems the most clear." - ] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "adata.uns['iroot'] = np.flatnonzero(adata.obs['annot'] == '5_progen')[0]\n", - "\n", - "sc.tl.dpt(adata)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Use the full raw data for visualization." - ] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "sc.pl.draw_graph(adata, color=['annot', 'dpt_pseudotime'], legend_loc='on data', legend_fontsize= 'x-small')" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "By looking at the different know lineages and the layout of the graph we\n", - "define manually some paths to the graph that corresponds to spcific\n", - "lineages." - ] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "# Define paths\n", - "\n", - "paths = [('erythrocytes', ['5_progen', '8_progen', '6', '3', '7', '11_eryth']),\n", - " ('lympoid', ['5_progen', '12', '16_BC', '14_TC']),\n", - " ('granulo', ['5_progen', '4_progen', '2', '9_gran', '1_gran']),\n", - " ('mono', ['5_progen', '4_progen', '0_mono', '18_dend', '17_macro'])\n", - " ]\n", - "\n", - "adata.obs['distance'] = adata.obs['dpt_pseudotime']" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Then we select some genes that can vary in the lineages and plot onto\n", - "the paths." - ] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "gene_names = ['Gata2', 'Gata1', 'Klf1', 'Epor', 'Hba-a2', # erythroid\n", - " 'Elane', 'Cebpe', 'Gfi1', # neutrophil\n", - " 'Irf8', 'Csf1r', 'Ctsg', # monocyte\n", - " 'Itga2b','Prss34','Cma1','Procr', # Megakaryo,Basophil,Mast,HPC\n", - " 'C1qc','Siglech','Ms4a1','Cd3e','Cd34']" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "_, axs = pl.subplots(ncols=4, figsize=(10, 4), gridspec_kw={\n", - " 'wspace': 0.05, 'left': 0.12})\n", - "pl.subplots_adjust(left=0.05, right=0.98, top=0.82, bottom=0.2)\n", - "for ipath, (descr, path) in enumerate(paths):\n", - " _, data = sc.pl.paga_path(\n", - " adata, path, gene_names,\n", - " show_node_names=False,\n", - " ax=axs[ipath],\n", - " ytick_fontsize=12,\n", - " left_margin=0.15,\n", - " n_avg=50,\n", - " annotations=['distance'],\n", - " show_yticks=True if ipath == 0 else False,\n", - " show_colorbar=False,\n", - " color_map='Greys',\n", - " groups_key='annot',\n", - " color_maps_annotations={'distance': 'viridis'},\n", - " title='{} path'.format(descr),\n", - " return_data=True,\n", - " use_raw=False,\n", - " show=False)\n", - " data.to_csv('data/trajectory/paga_path_{}.csv'.format(descr))\n", - "pl.savefig('data/trajectory/paga_path.pdf')\n", - "pl.show()" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "
\n", - "\n", - "> **Discuss**\n", - ">\n", - "> As you can see, we can manipulate the trajectory quite a bit by\n", - "> selecting different number of neighbors, components etc. to fit with\n", - "> our assumptions on the development of these celltypes.\n", - ">\n", - "> Please explore further how you can tweak the trajectory. For instance,\n", - "> can you create a PAGA trajectory using the orignial umap from Seurat\n", - "> instead? Hint, you first need to compute the neighbors on the umap.\n", - "\n", - "
\n", - "\n", - "## Session info\n", - "\n", - "```{=html}\n", - "
\n", - "```\n", - "```{=html}\n", - "\n", - "```\n", - "Click here\n", - "```{=html}\n", - "\n", - "```" - ] - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "sc.logging.print_versions()" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```{=html}\n", - "
\n", - "```" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} \ No newline at end of file diff --git a/docs/labs/scanpy/scanpy_07_trajectory.html b/docs/labs/scanpy/scanpy_07_trajectory.html deleted file mode 100644 index 6450ff39..00000000 --- a/docs/labs/scanpy/scanpy_07_trajectory.html +++ /dev/null @@ -1,1257 +0,0 @@ - - - - - - - - - - - - - - - Trajectory inference using PAGA - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- -
- -
-
-
-

Trajectory inference using PAGA

-

Scanpy Toolkit

-
-
- Reconstructing developmental or differentiation pathways from individual cell gene expression profiles to understand cellular transitions and relationships. -
-
-
-
- - -
- -
-
Authors
-
-

Åsa Björklund

-

Paulo Czarnewski

-

Susanne Reinsbach

-

Roy Francis

-
-
- -
-
Published
-
-

16-Jan-2024

-
-
- - -
- - -
- - - - -
- - - - - -
-
-
- -
-
-Note -
-
-
-

Code chunks run Python commands unless it starts with %%bash, in which case, those chunks run shell commands.

-
-
-

Partly following this tutorial.

-
-

1 Loading libraries

-
-
import numpy as np
-import pandas as pd
-import matplotlib.pyplot as pl
-from matplotlib import rcParams
-import scanpy as sc
-
-import scipy
-import numpy as np
-import matplotlib.pyplot as plt
-import warnings
-
-warnings.simplefilter(action="ignore", category=Warning)
-
-# verbosity: errors (0), warnings (1), info (2), hints (3)
-sc.settings.verbosity = 3
-sc.settings.set_figure_params(dpi=100, frameon=False, figsize=(5, 5), facecolor='white', color_map = 'viridis_r') 
-
-
-
-

2 Loading data

-

In order to speed up the computations during the exercises, we will be using a subset of a bone marrow dataset (originally containing about 100K cells). The bone marrow is the source of adult immune cells, and contains virtually all differentiation stages of cell from the immune system which later circulate in the blood to all other organs.

-
-
-

-
-
-

All the data has been preprocessed with Seurat. The file trajectory_scanpy_filtered.h5ad was converted from the Seurat object using the SeuratDisk package. For more information on how it was done, have a look at the script: convert_to_h5ad.R in the github repo.

-
-
import os
-
-path_data = "https://export.uppmax.uu.se/naiss2023-23-3/workshops/workshop-scrnaseq"
-
-path_trajectory = "./data/trajectory"
-if not os.path.exists(path_trajectory):
-    os.makedirs(path_trajectory, exist_ok=True)
-
-
-
import urllib.request
-
-path_results = "data/trajectory"
-if not os.path.exists(path_results):
-    os.makedirs(path_results, exist_ok=True)
-
-path_file = "data/trajectory/trajectory_seurat_filtered.h5ad"
-if not os.path.exists(path_file):
-    file_url = os.path.join(
-        path_data, "trajectory/trajectory_seurat_filtered.h5ad")
-    urllib.request.urlretrieve(file_url, path_file)
-
-

Check that the variable names are correct.

-
-
adata = sc.read_h5ad("data/trajectory/trajectory_seurat_filtered.h5ad")
-adata.var
-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
features
0610040J01Rik0610040J01Rik
1190007I07Rik1190007I07Rik
1500009L16Rik1500009L16Rik
1700012B09Rik1700012B09Rik
1700020L24Rik1700020L24Rik
......
SqorSqor
Sting1Sting1
Tent5aTent5a
Tlcd4Tlcd4
Znrd2Znrd2
- -

3585 rows × 1 columns

-
-
-
-
-
# check what you have in the X matrix, should be lognormalized counts.
-print(adata.X[:10,:10])
-
-
  (0, 4)    0.11622072805743532
-  (0, 8)    0.4800893970571722
-  (1, 8)    0.2478910541698065
-  (1, 9)    0.17188973970230348
-  (2, 1)    0.09413397843954842
-  (2, 7)    0.18016412971724202
-  (3, 1)    0.08438841021254412
-  (3, 4)    0.08438841021254412
-  (3, 7)    0.08438841021254412
-  (3, 8)    0.3648216463668793
-  (4, 1)    0.14198147850903975
-  (4, 8)    0.14198147850903975
-  (5, 1)    0.17953169693896723
-  (5, 8)    0.17953169693896723
-  (5, 9)    0.17953169693896723
-  (6, 4)    0.2319546390006887
-  (6, 8)    0.42010741700351195
-  (7, 1)    0.1775659421407816
-  (7, 8)    0.39593115482156394
-  (7, 9)    0.09271901219711086
-  (8, 1)    0.12089079757716388
-  (8, 8)    0.22873058755480363
-  (9, 1)    0.08915380247493314
-  (9, 4)    0.08915380247493314
-  (9, 8)    0.38270398718590104
-
-
-
-
-

3 Explore the data

-

There is a umap and clusters provided with the object, first plot some information from the previous analysis onto the umap.

-
-
sc.pl.umap(adata, color = ['clusters','dataset','batches','Phase'],legend_loc = 'on data', legend_fontsize = 'xx-small', ncols = 2)
-
-
-
-

-
-
-
-
-

It is crucial that you performing analysis of a dataset understands what is going on, what are the clusters you see in your data and most importantly How are the clusters related to each other?. Well, let’s explore the data a bit. With the help of this table, write down which cluster numbers in your dataset express these key markers.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
MarkerCell Type
Cd34HSC progenitor
Ms4a1B cell lineage
Cd3eT cell lineage
LtfGranulocyte lineage
Cst3Monocyte lineage
Mcpt8Mast Cell lineage
Alas2RBC lineage
SiglechDendritic cell lineage
C1qcMacrophage cell lineage
Pf4Megakaryocyte cell lineage
-
-
markers = ["Cd34","Alas2","Pf4","Mcpt8","Ltf","Cst3", "Siglech", "C1qc", "Ms4a1", "Cd3e", ]
-sc.pl.umap(adata, color = markers, use_raw = False, ncols = 4)
-
-
-
-

-
-
-
-
-
-
-

4 Rerun analysis in Scanpy

-

Redo clustering and umap using the basic Scanpy pipeline. Use the provided “X_harmony_Phase” dimensionality reduction as the staring point.

-
-
# first, store the old umap with a new name so it is not overwritten
-adata.obsm['X_umap_old'] = adata.obsm['X_umap']
-
-sc.pp.neighbors(adata, n_pcs = 30, n_neighbors = 20, use_rep="X_harmony_Phase")
-sc.tl.umap(adata, min_dist=0.4, spread=3)
-
-
computing neighbors
-    finished: added to `.uns['neighbors']`
-    `.obsp['distances']`, distances for each pair of neighbors
-    `.obsp['connectivities']`, weighted adjacency matrix (0:00:08)
-computing UMAP
-    finished: added
-    'X_umap', UMAP coordinates (adata.obsm) (0:00:09)
-
-
-
-
#sc.tl.umap(adata, min_dist=0.6, spread=1.5)
-sc.pl.umap(adata, color = ['clusters'],legend_loc = 'on data', legend_fontsize = 'xx-small', edges = True)
-
-sc.pl.umap(adata, color = markers, use_raw = False, ncols = 4)
-
-# Redo clustering as well
-sc.tl.leiden(adata, key_added = "leiden_1.0", resolution = 1.0) # default resolution in 1.0
-sc.tl.leiden(adata, key_added = "leiden_1.2", resolution = 1.2) # default resolution in 1.0
-sc.tl.leiden(adata, key_added = "leiden_1.4", resolution = 1.4) # default resolution in 1.0
-
-#sc.tl.louvain(adata, key_added = "leiden_1.0") # default resolution in 1.0
-sc.pl.umap(adata, color = ['leiden_1.0', 'leiden_1.2', 'leiden_1.4','clusters'],legend_loc = 'on data', legend_fontsize = 'xx-small', ncols =2)
-
-
-
-

-
-
-
-
-
-
-

-
-
-
-
-
running Leiden clustering
-    finished: found 16 clusters and added
-    'leiden_1.0', the cluster labels (adata.obs, categorical) (0:00:02)
-running Leiden clustering
-    finished: found 17 clusters and added
-    'leiden_1.2', the cluster labels (adata.obs, categorical) (0:00:01)
-running Leiden clustering
-    finished: found 19 clusters and added
-    'leiden_1.4', the cluster labels (adata.obs, categorical) (0:00:02)
-
-
-
-
-

-
-
-
-
-
-
#Rename clusters with really clear markers, the rest are left unlabelled.
-
-annot = pd.DataFrame(adata.obs['leiden_1.4'].astype('string'))
-annot[annot['leiden_1.4'] == '10'] = '10_megakaryo' #Pf4
-annot[annot['leiden_1.4'] == '17'] = '17_macro'  #C1qc
-annot[annot['leiden_1.4'] == '11'] = '11_eryth' #Alas2
-annot[annot['leiden_1.4'] == '18'] = '18_dend' #Siglech
-annot[annot['leiden_1.4'] == '13'] = '13_mast' #Mcpt8
-annot[annot['leiden_1.4'] == '0'] = '0_mono' #Cts3
-annot[annot['leiden_1.4'] == '1'] = '1_gran' #Ltf
-annot[annot['leiden_1.4'] == '9'] = '9_gran'
-annot[annot['leiden_1.4'] == '14'] = '14_TC' #Cd3e
-annot[annot['leiden_1.4'] == '16'] = '16_BC' #Ms4a1
-annot[annot['leiden_1.4'] == '8'] = '8_progen'  # Cd34
-annot[annot['leiden_1.4'] == '4'] = '4_progen' 
-annot[annot['leiden_1.4'] == '5'] = '5_progen'
-
-adata.obs['annot']=annot['leiden_1.4'].astype('category')
-
-sc.pl.umap(adata, color = 'annot',legend_loc = 'on data', legend_fontsize = 'xx-small', ncols =2)
-
-annot.value_counts()
-#type(annot)
-
-# astype('category')
-
-
-
-

-
-
-
-
-
leiden_1.4  
-0_mono          509
-1_gran          487
-2               479
-3               463
-4_progen        387
-5_progen        384
-7               368
-6               368
-8_progen        367
-9_gran          366
-10_megakaryo    301
-11_eryth        294
-12              276
-13_mast         159
-14_TC           151
-15              128
-16_BC           124
-17_macro        116
-18_dend         101
-Name: count, dtype: int64
-
-
-
-
# plot onto the Seurat embedding:
-sc.pl.embedding(adata, basis='X_umap_old', color = 'annot',legend_loc = 'on data', legend_fontsize = 'xx-small', ncols =2)
-
-
-
-

-
-
-
-
-
-
-

5 Run PAGA

-

Use the clusters from leiden clustering with leiden_1.4 and run PAGA. First we create the graph and initialize the positions using the umap.

-
-
# use the umap to initialize the graph layout.
-sc.tl.draw_graph(adata, init_pos='X_umap')
-sc.pl.draw_graph(adata, color='annot', legend_loc='on data', legend_fontsize = 'xx-small')
-sc.tl.paga(adata, groups='annot')
-sc.pl.paga(adata, color='annot', edge_width_scale = 0.3)
-
-
drawing single-cell graph using layout 'fa'
-WARNING: Package 'fa2' is not installed, falling back to layout 'fr'.To use the faster and better ForceAtlas2 layout, install package 'fa2' (`pip install fa2`).
-    finished: added
-    'X_draw_graph_fr', graph_drawing coordinates (adata.obsm) (0:00:03)
-running PAGA
-    finished: added
-    'paga/connectivities', connectivities adjacency (adata.uns)
-    'paga/connectivities_tree', connectivities subtree (adata.uns) (0:00:00)
---> added 'pos', the PAGA positions (adata.uns['paga'])
-
-
-
-
-

-
-
-
-
-
-
-

-
-
-
-
-

As you can see, we have edges between many clusters that we know are are unrelated, so we may need to clean up the data a bit more.

-
-
-

6 Data pre-processing prior trajectory inference

-

First, lets explore the graph a bit. So we plot the umap with the graph connections on top.

-
-
sc.pl.umap(adata, edges=True, color = 'annot', legend_loc= 'on data', legend_fontsize= 'xx-small')
-
-
-
-

-
-
-
-
-

We have many edges in the graph between unrelated clusters, so lets try with fewer neighbors.

-
-
sc.pp.neighbors(adata, n_neighbors=5,  use_rep = 'X_harmony_Phase', n_pcs = 30)
-sc.pl.umap(adata, edges=True, color = 'annot', legend_loc= 'on data', legend_fontsize= 'xx-small')
-
-
computing neighbors
-    finished: added to `.uns['neighbors']`
-    `.obsp['distances']`, distances for each pair of neighbors
-    `.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
-
-
-
-
-

-
-
-
-
-
-
-

7 Rerun PAGA again on the data

-
-
sc.tl.draw_graph(adata, init_pos='X_umap')
-sc.pl.draw_graph(adata, color='annot', legend_loc='on data', legend_fontsize = 'xx-small')
-
-
drawing single-cell graph using layout 'fa'
-WARNING: Package 'fa2' is not installed, falling back to layout 'fr'.To use the faster and better ForceAtlas2 layout, install package 'fa2' (`pip install fa2`).
-    finished: added
-    'X_draw_graph_fr', graph_drawing coordinates (adata.obsm) (0:00:02)
-
-
-
-
-

-
-
-
-
-
-
sc.tl.paga(adata, groups='annot')
-sc.pl.paga(adata, color='annot', edge_width_scale = 0.3)
-
-
running PAGA
-    finished: added
-    'paga/connectivities', connectivities adjacency (adata.uns)
-    'paga/connectivities_tree', connectivities subtree (adata.uns) (0:00:00)
---> added 'pos', the PAGA positions (adata.uns['paga'])
-
-
-
-
-

-
-
-
-
-
-
-

8 Recomputing the embedding using PAGA-initialization

-

The following is just as well possible for a UMAP.

-
-
sc.tl.draw_graph(adata, init_pos='paga')
-
-
drawing single-cell graph using layout 'fa'
-WARNING: Package 'fa2' is not installed, falling back to layout 'fr'.To use the faster and better ForceAtlas2 layout, install package 'fa2' (`pip install fa2`).
-    finished: added
-    'X_draw_graph_fr', graph_drawing coordinates (adata.obsm) (0:00:15)
-
-
-

Now we can see all marker genes also at single-cell resolution in a meaningful layout.

-
-
sc.pl.draw_graph(adata, color=['annot'], legend_loc='on data', legend_fontsize=  'xx-small')
-
-
-
-

-
-
-
-
-

Compare the 2 graphs

-
-
sc.pl.paga_compare(
-    adata, threshold=0.03, title='', right_margin=0.2, size=10, edge_width_scale=0.5,
-    legend_fontsize=12, fontsize=12, frameon=False, edges=True)
-
-
--> added 'pos', the PAGA positions (adata.uns['paga'])
-
-
-
-
-

-
-
-
-
-
-
-

9 Reconstructing gene changes along PAGA paths for a given set of genes

-

Choose a root cell for diffusion pseudotime. We have 3 progenitor clusters, but cluster 5 seems the most clear.

-
-
adata.uns['iroot'] = np.flatnonzero(adata.obs['annot']  == '5_progen')[0]
-
-sc.tl.dpt(adata)
-
-
WARNING: Trying to run `tl.dpt` without prior call of `tl.diffmap`. Falling back to `tl.diffmap` with default parameters.
-computing Diffusion Maps using n_comps=15(=n_dcs)
-computing transitions
-    finished (0:00:00)
-    eigenvalues of transition matrix
-    [1.         0.9989591  0.997628   0.9970365  0.9956704  0.99334306
-     0.9918951  0.9915921  0.99013233 0.98801893 0.9870309  0.9861044
-     0.9851118  0.9845008  0.9839531 ]
-    finished: added
-    'X_diffmap', diffmap coordinates (adata.obsm)
-    'diffmap_evals', eigenvalues of transition matrix (adata.uns) (0:00:01)
-computing Diffusion Pseudotime using n_dcs=10
-    finished: added
-    'dpt_pseudotime', the pseudotime (adata.obs) (0:00:00)
-
-
-

Use the full raw data for visualization.

-
-
sc.pl.draw_graph(adata, color=['annot', 'dpt_pseudotime'], legend_loc='on data', legend_fontsize= 'x-small')
-
-
-
-

-
-
-
-
-

By looking at the different know lineages and the layout of the graph we define manually some paths to the graph that corresponds to spcific lineages.

-
-
# Define paths
-
-paths = [('erythrocytes', ['5_progen', '8_progen', '6', '3', '7', '11_eryth']),
-         ('lympoid', ['5_progen', '12', '16_BC', '14_TC']),
-         ('granulo', ['5_progen', '4_progen', '2', '9_gran', '1_gran']),
-         ('mono', ['5_progen', '4_progen', '0_mono', '18_dend', '17_macro'])
-         ]
-
-adata.obs['distance'] = adata.obs['dpt_pseudotime']
-
-

Then we select some genes that can vary in the lineages and plot onto the paths.

-
-
gene_names = ['Gata2', 'Gata1', 'Klf1', 'Epor', 'Hba-a2',  # erythroid
-              'Elane', 'Cebpe', 'Gfi1',                    # neutrophil
-              'Irf8', 'Csf1r', 'Ctsg',                     # monocyte
-              'Itga2b','Prss34','Cma1','Procr',            # Megakaryo,Basophil,Mast,HPC
-              'C1qc','Siglech','Ms4a1','Cd3e','Cd34']
-
-
-
_, axs = pl.subplots(ncols=4, figsize=(10, 4), gridspec_kw={
-                     'wspace': 0.05, 'left': 0.12})
-pl.subplots_adjust(left=0.05, right=0.98, top=0.82, bottom=0.2)
-for ipath, (descr, path) in enumerate(paths):
-    _, data = sc.pl.paga_path(
-        adata, path, gene_names,
-        show_node_names=False,
-        ax=axs[ipath],
-        ytick_fontsize=12,
-        left_margin=0.15,
-        n_avg=50,
-        annotations=['distance'],
-        show_yticks=True if ipath == 0 else False,
-        show_colorbar=False,
-        color_map='Greys',
-        groups_key='annot',
-        color_maps_annotations={'distance': 'viridis'},
-        title='{} path'.format(descr),
-        return_data=True,
-        use_raw=False,
-        show=False)
-    data.to_csv('data/trajectory/paga_path_{}.csv'.format(descr))
-pl.savefig('data/trajectory/paga_path.pdf')
-pl.show()
-
-
-
-

-
-
-
-
-
-
-
- -
-
-Discuss -
-
-
-

As you can see, we can manipulate the trajectory quite a bit by selecting different number of neighbors, components etc. to fit with our assumptions on the development of these celltypes.

-

Please explore further how you can tweak the trajectory. For instance, can you create a PAGA trajectory using the orignial umap from Seurat instead? Hint, you first need to compute the neighbors on the umap.

-
-
-
-
-

10 Session info

-
- -Click here - -
-
sc.logging.print_versions()
-
-
-----
-anndata     0.10.3
-scanpy      1.9.6
------
-PIL                 10.0.0
-anyio               NA
-asttokens           NA
-attr                23.1.0
-babel               2.12.1
-backcall            0.2.0
-certifi             2023.11.17
-cffi                1.15.1
-charset_normalizer  3.1.0
-colorama            0.4.6
-comm                0.1.3
-cycler              0.12.1
-cython_runtime      NA
-dateutil            2.8.2
-debugpy             1.6.7
-decorator           5.1.1
-defusedxml          0.7.1
-exceptiongroup      1.2.0
-executing           1.2.0
-fastjsonschema      NA
-fontTools           4.47.0
-gmpy2               2.1.2
-h5py                3.9.0
-idna                3.4
-igraph              0.10.8
-ipykernel           6.23.1
-ipython_genutils    0.2.0
-jedi                0.18.2
-jinja2              3.1.2
-joblib              1.3.2
-json5               NA
-jsonpointer         2.0
-jsonschema          4.17.3
-jupyter_events      0.6.3
-jupyter_server      2.6.0
-jupyterlab_server   2.22.1
-kiwisolver          1.4.5
-leidenalg           0.10.1
-llvmlite            0.41.1
-louvain             0.8.1
-markupsafe          2.1.2
-matplotlib          3.8.0
-matplotlib_inline   0.1.6
-mpl_toolkits        NA
-mpmath              1.3.0
-natsort             8.4.0
-nbformat            5.8.0
-networkx            3.2.1
-numba               0.58.1
-numpy               1.26.2
-opt_einsum          v3.3.0
-overrides           NA
-packaging           23.1
-pandas              2.1.4
-parso               0.8.3
-pexpect             4.8.0
-pickleshare         0.7.5
-pkg_resources       NA
-platformdirs        3.5.1
-prometheus_client   NA
-prompt_toolkit      3.0.38
-psutil              5.9.5
-ptyprocess          0.7.0
-pure_eval           0.2.2
-pvectorc            NA
-pycparser           2.21
-pydev_ipython       NA
-pydevconsole        NA
-pydevd              2.9.5
-pydevd_file_utils   NA
-pydevd_plugins      NA
-pydevd_tracing      NA
-pygments            2.15.1
-pynndescent         0.5.11
-pyparsing           3.1.1
-pyrsistent          NA
-pythonjsonlogger    NA
-pytz                2023.3
-requests            2.31.0
-rfc3339_validator   0.1.4
-rfc3986_validator   0.1.1
-scipy               1.11.4
-send2trash          NA
-session_info        1.0.0
-six                 1.16.0
-sklearn             1.3.2
-sniffio             1.3.0
-socks               1.7.1
-sparse              0.14.0
-stack_data          0.6.2
-sympy               1.12
-texttable           1.7.0
-threadpoolctl       3.2.0
-torch               2.0.0
-tornado             6.3.2
-tqdm                4.65.0
-traitlets           5.9.0
-typing_extensions   NA
-umap                0.5.5
-urllib3             2.0.2
-wcwidth             0.2.6
-websocket           1.5.2
-yaml                6.0
-zmq                 25.0.2
-zoneinfo            NA
-zstandard           0.19.0
------
-IPython             8.13.2
-jupyter_client      8.2.0
-jupyter_core        5.3.0
-jupyterlab          4.0.1
-notebook            6.5.4
------
-Python 3.10.11 | packaged by conda-forge | (main, May 10 2023, 18:58:44) [GCC 11.3.0]
-Linux-6.5.11-linuxkit-x86_64-with-glibc2.35
------
-Session information updated at 2024-01-23 11:32
-
-
-
- - -
- -
- -
- - - - - - - \ No newline at end of file diff --git a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-10-output-1.png b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-10-output-1.png index 03d88c61..1cf157c6 100644 Binary files a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-10-output-1.png and b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-10-output-1.png differ diff --git a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-11-output-1.png b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-11-output-1.png index 1cf157c6..4854b07e 100644 Binary files a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-11-output-1.png and b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-11-output-1.png differ diff --git a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-12-output-1.png b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-12-output-1.png deleted file mode 100644 index 4854b07e..00000000 Binary files a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-12-output-1.png and /dev/null differ diff --git a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-13-output-2.png b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-12-output-2.png similarity index 100% rename from docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-13-output-2.png rename to docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-12-output-2.png diff --git a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-13-output-3.png b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-12-output-3.png similarity index 100% rename from docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-13-output-3.png rename to docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-12-output-3.png diff --git a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-14-output-1.png b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-13-output-1.png similarity index 100% rename from docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-14-output-1.png rename to docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-13-output-1.png diff --git a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-14-output-2.png b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-14-output-2.png new file mode 100644 index 00000000..be3d90e1 Binary files /dev/null and b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-14-output-2.png differ diff --git a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-15-output-2.png b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-15-output-2.png index be3d90e1..92ca62b9 100644 Binary files a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-15-output-2.png and b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-15-output-2.png differ diff --git a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-16-output-2.png b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-16-output-2.png index 92ca62b9..c133133f 100644 Binary files a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-16-output-2.png and b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-16-output-2.png differ diff --git a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-17-output-2.png b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-17-output-2.png deleted file mode 100644 index c133133f..00000000 Binary files a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-17-output-2.png and /dev/null differ diff --git a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-19-output-1.png b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-18-output-1.png similarity index 100% rename from docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-19-output-1.png rename to docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-18-output-1.png diff --git a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-20-output-2.png b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-19-output-2.png similarity index 100% rename from docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-20-output-2.png rename to docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-19-output-2.png diff --git a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-22-output-1.png b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-21-output-1.png similarity index 100% rename from docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-22-output-1.png rename to docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-21-output-1.png diff --git a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-25-output-1.png b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-24-output-1.png similarity index 100% rename from docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-25-output-1.png rename to docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-24-output-1.png diff --git a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-6-output-1.png b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-6-output-1.png new file mode 100644 index 00000000..d77fe270 Binary files /dev/null and b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-6-output-1.png differ diff --git a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-7-output-1.png b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-7-output-1.png index d77fe270..5aea0c6a 100644 Binary files a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-7-output-1.png and b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-7-output-1.png differ diff --git a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-8-output-1.png b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-8-output-1.png deleted file mode 100644 index 5aea0c6a..00000000 Binary files a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-8-output-1.png and /dev/null differ diff --git a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-9-output-1.png b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-9-output-1.png new file mode 100644 index 00000000..03d88c61 Binary files /dev/null and b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-9-output-1.png differ diff --git a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-10-output-2.png b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-9-output-2.png similarity index 100% rename from docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-10-output-2.png rename to docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-9-output-2.png diff --git a/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-10-output-4.png b/docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-9-output-4.png similarity index 100% rename from docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-10-output-4.png rename to docs/labs/scanpy/scanpy_07_trajectory_files/figure-html/cell-9-output-4.png diff --git a/docs/labs/scanpy/scanpy_08_spatial.html b/docs/labs/scanpy/scanpy_08_spatial.html index 40f74d85..465586b6 100644 --- a/docs/labs/scanpy/scanpy_08_spatial.html +++ b/docs/labs/scanpy/scanpy_08_spatial.html @@ -207,7 +207,7 @@

Published
-

16-Jan-2024

+

26-Jan-2024

@@ -241,6 +241,8 @@

On this page

  • 6 Deconvolution
  • 7 Session info
  • @@ -286,9 +288,8 @@

    warnings.simplefilter(action="ignore", category=Warning) -#sc.logging.print_versions() # gives errror!! -sc.set_figure_params(facecolor="white", figsize=(8, 8)) -sc.settings.verbosity = 3 +sc.set_figure_params(facecolor="white", figsize=(8, 8)) +sc.settings.verbosity = 3

    Load ST data

    The function datasets.visium_sge() downloads the dataset from 10x genomics and returns an AnnData object that contains counts, images and spatial coordinates. We will calculate standards QC metrics with pp.calculate_qc_metrics() and visualize them.

    @@ -330,7 +331,7 @@

    adata
    -
    AnnData object with n_obs × n_vars = 6050 × 32285
    +
    AnnData object with n_obs × n_vars = 6049 × 31053
         obs: 'in_tissue', 'array_row', 'array_col', 'library_id'
         var: 'gene_ids', 'feature_types', 'genome'
         uns: 'spatial'
    @@ -390,7 +391,7 @@ 

    adata = adata[keep,:]

    -
    5749
    +
    5725

    And replot onto tissue sections.

    @@ -449,7 +450,7 @@

    print(adata.n_obs, adata.n_vars)
    22
    -5749 32263
    +5725 31031
    @@ -478,7 +479,7 @@

    sc.tl.leiden(adata, key_added="clusters")
    computing neighbors
    -WARNING: You’re trying to run this on 2405 dimensions of `.X`, if you really want this, set `use_rep='X'`.
    +WARNING: You’re trying to run this on 2404 dimensions of `.X`, if you really want this, set `use_rep='X'`.
              Falling back to preprocessing with `sc.pp.pca` and default params.
     computing PCA
         with n_comps=50
    -    finished (0:00:00)
    +    finished (0:00:01)
         finished: added to `.uns['neighbors']`
         `.obsp['distances']`, distances for each pair of neighbors
    -    `.obsp['connectivities']`, weighted adjacency matrix (0:00:08)
    +    `.obsp['connectivities']`, weighted adjacency matrix (0:00:09)
     computing UMAP
         finished: added
         'X_umap', UMAP coordinates (adata.obsm) (0:00:09)
     running Leiden clustering
    -    finished: found 23 clusters and added
    +    finished: found 22 clusters and added
         'clusters', the cluster labels (adata.obs, categorical) (0:00:01)
    @@ -537,7 +538,7 @@

    adata, color=["clusters", "library_id"], palette=sc.pl.palettes.default_20 )
    -
    WARNING: Length of palette colors is smaller than the number of categories (palette length: 20, categories length: 23. Some categories will have the same color.
    +
    WARNING: Length of palette colors is smaller than the number of categories (palette length: 20, categories length: 22. Some categories will have the same color.
    @@ -596,13 +597,13 @@

    adatas

    -
    {'V1_Mouse_Brain_Sagittal_Anterior': View of AnnData object with n_obs × n_vars = 2597 × 2405
    +
    {'V1_Mouse_Brain_Sagittal_Anterior': View of AnnData object with n_obs × n_vars = 2590 × 2404
          obs: 'in_tissue', 'array_row', 'array_col', 'library_id', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_hb', 'pct_counts_hb', 'clusters'
          var: 'gene_ids', 'feature_types', 'genome', 'mt', 'hb', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'mean', 'std'
          uns: 'spatial', 'library_id_colors', 'log1p', 'hvg', 'neighbors', 'umap', 'leiden', 'clusters_colors'
          obsm: 'spatial', 'X_pca', 'X_umap'
          obsp: 'distances', 'connectivities',
    - 'V1_Mouse_Brain_Sagittal_Posterior': View of AnnData object with n_obs × n_vars = 3152 × 2405
    + 'V1_Mouse_Brain_Sagittal_Posterior': View of AnnData object with n_obs × n_vars = 3135 × 2404
          obs: 'in_tissue', 'array_row', 'array_col', 'library_id', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_hb', 'pct_counts_hb', 'clusters'
          var: 'gene_ids', 'feature_types', 'genome', 'mt', 'hb', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'mean', 'std'
          uns: 'spatial', 'library_id_colors', 'log1p', 'hvg', 'neighbors', 'umap', 'leiden', 'clusters_colors'
    @@ -631,14 +632,14 @@ 

    adata

    -
    Found 2405 genes among all datasets
    -[[0.         0.47824413]
    +
    Found 2404 genes among all datasets
    +[[0.         0.46409266]
      [0.         0.        ]]
     Processing datasets (0, 1)
    -(5749, 50)
    +(5725, 50)
    -
    AnnData object with n_obs × n_vars = 5749 × 2405
    +
    AnnData object with n_obs × n_vars = 5725 × 2404
         obs: 'in_tissue', 'array_row', 'array_col', 'library_id', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_hb', 'pct_counts_hb', 'clusters'
         var: 'gene_ids', 'feature_types', 'genome', 'mt', 'hb', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'mean', 'std'
         uns: 'spatial', 'library_id_colors', 'log1p', 'hvg', 'neighbors', 'umap', 'leiden', 'clusters_colors'
    @@ -662,10 +663,10 @@ 

    + finished: found 20 clusters and added + 'clusters', the cluster labels (adata.obs, categorical) (0:00:02)
    @@ -698,7 +699,7 @@

    for k, v in clusters_colors.items() if k in ad.obs.clusters.unique().tolist() ], - legend_loc="on data", + legend_loc=None, show=False, ax=axs[i], ) @@ -741,7 +742,7 @@

    with open('data/spatial/visium/scanpy_spatialde.pkl', 'wb') as file: pickle.dump(results, file)

    -
    -
    import urllib.request
    -import os
    -
    -path_data = "https://export.uppmax.uu.se/naiss2023-23-3/workshops/workshop-scrnaseq"
    +
    +
    # skip for now
    +
    +import urllib.request
    +import os
     
    -path_file = "data/spatial/visium/scanpy_spatialde.pkl"
    -if not os.path.exists(path_file):
    -    file_url = os.path.join(
    -        path_data, "spatial/visium/results/scanpy_spatialde.pkl")
    -    urllib.request.urlretrieve(file_url, path_file)
    -
    -
    -
    import pickle
    -with open('data/spatial/visium/scanpy_spatialde.pkl', 'rb') as file:
    -    results = pickle.load(file)
    -
    -
    -
    # We concatenate the results with the DataFrame of annotations of variables: `adata.var`.
    -results.index = results["g"]
    -adata.var = pd.concat(
    -    [adata.var, results.loc[adata.var.index.values, :]], axis=1)
    -adata.write_h5ad('./data/spatial/visium/adata_processed_sc.h5ad')
    -
    -# Then we can inspect significant genes that varies in space and visualize them with `sc.pl.spatial` function.
    -results.sort_values("qval").head(10)
    -
    -
    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    FSVMglmax_deltamax_llmax_mu_hatmax_s2_t_hatmodelns2_FSVs2_logdeltatimeBICmax_ll_nullLLRpvalqval
    g
    Efnb31.775241e-014Efnb3544.7336584.490546e+00-4701.6907680.9204366.950994e-02SE57498.838199e-064.029374e-040.0028179438.008662-5374.163817672.4730490.00.0
    S100a165.205988e-024S100a16544.7336581.764863e+01-4013.5338731.7034743.034361e-02SE57497.639943e-062.521451e-030.0028748061.694871-4099.97049886.4366250.00.0
    S100a53.082312e-014S100a5544.7336582.175292e+00-989.712703-0.6927403.893413e-02SE57491.244686e-053.006748e-040.0068362014.052530-2128.9165431139.2038410.00.0
    S100a61.198049e-014S100a6544.7336587.120948e+00-4911.2777570.0243944.364682e-02SE57491.525869e-051.234171e-030.0055819857.182640-5087.893957176.6161990.00.0
    Cers26.170160e-024Cers2544.7336581.473933e+01-4909.2540621.6889633.873848e-02SE57496.963833e-061.699495e-030.0026159853.135249-5025.230150115.9760880.00.0
    Car146.721518e-024Car14544.7336581.345078e+01-4078.2113271.6466343.422252e-02SE57495.827188e-061.224504e-030.0027388191.049780-4232.446324154.2349970.00.0
    Hmgcs21.043706e-014Hmgcs2544.7336588.317319e+0064.273783-0.6972469.799863e-03SE57491.273224e-051.280151e-030.002757-93.920441-106.666922170.9407040.00.0
    Atp1a11.951715e-014Atp1a1544.7336583.996873e+00-2655.884269-1.9453256.109633e-02SE57491.387274e-055.578607e-040.0024615346.395662-3228.572142572.6878730.00.0
    Vangl11.997762e-094Vangl1544.7336584.851652e+08523.655680-0.6330829.297993e-10SE57495.712059e-091.036289e+090.011918-1012.684235435.77379487.8818860.00.0
    Tspan22.008700e-014Tspan2544.7336583.855987e+00-4904.3859282.8627791.358988e-01SE57491.765625e-056.842354e-040.0021829843.398981-5358.727526454.3415980.00.0
    - -
    -
    +path_data = "https://export.uppmax.uu.se/naiss2023-23-3/workshops/workshop-scrnaseq" + +path_file = "data/spatial/visium/scanpy_spatialde.pkl" +if not os.path.exists(path_file): + file_url = os.path.join( + path_data, "spatial/visium/results/scanpy_spatialde.pkl") + urllib.request.urlretrieve(file_url, path_file)
    +
    +
    +
    # skip for now
    +
    +import pickle
    +with open('data/spatial/visium/scanpy_spatialde.pkl', 'rb') as file:
    +    results = pickle.load(file)
    +
    +
    +
    # skip for now.
    +
    +# We concatenate the results with the DataFrame of annotations of variables: `adata.var`.
    +results.index = results["g"]
    +adata.var = pd.concat(
    +    [adata.var, results.loc[adata.var.index.values, :]], axis=1)
    +adata.write_h5ad('./data/spatial/visium/adata_processed_sc.h5ad')
    +
    +# Then we can inspect significant genes that varies in space and visualize them with `sc.pl.spatial` function.
    +results.sort_values("qval").head(10)
    @@ -1113,15 +855,16 @@

    adata_cortex = sc.read_h5ad("data/spatial/visium/allen_cortex.h5ad")
     adata_cortex
    -
    +
    AnnData object with n_obs × n_vars = 14249 × 34617
         obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'sample_id', 'sample_type', 'organism', 'donor', 'sex', 'age_days', 'eye_condition', 'genotype', 'driver_lines', 'reporter_lines', 'brain_hemisphere', 'brain_region', 'brain_subregion', 'injection_label_direction', 'injection_primary', 'injection_secondary', 'injection_tract', 'injection_material', 'injection_exclusion_criterion', 'facs_date', 'facs_container', 'facs_sort_criteria', 'rna_amplification_set', 'library_prep_set', 'library_prep_avg_size_bp', 'seq_name', 'seq_tube', 'seq_batch', 'total_reads', 'percent_exon_reads', 'percent_intron_reads', 'percent_intergenic_reads', 'percent_rrna_reads', 'percent_mt_exon_reads', 'percent_reads_unique', 'percent_synth_reads', 'percent_ecoli_reads', 'percent_aligned_reads_total', 'complexity_cg', 'genes_detected_cpm_criterion', 'genes_detected_fpkm_criterion', 'tdt_cpm', 'gfp_cpm', 'class', 'subclass', 'cluster', 'confusion_score', 'cluster_correlation', 'core_intermediate_call'
         var: 'features'
    +

    Here is the metadata for the cell annotation:

    adata_cortex.obs
    -
    +
    @@ -1424,15 +1167,87 @@

    -
    sc.pp.normalize_total(adata_cortex, target_sum=1e5)
    -sc.pp.log1p(adata_cortex)
    -sc.pp.highly_variable_genes(adata_cortex, min_mean=0.0125, max_mean=3, min_disp=0.5)
    -sc.pp.scale(adata_cortex, max_value=10)
    -sc.tl.pca(adata_cortex, svd_solver='arpack')
    -sc.pp.neighbors(adata_cortex, n_pcs=30)
    -sc.tl.umap(adata_cortex)
    -sc.pl.umap(adata_cortex, color="subclass", legend_loc='on data')
    +
    adata_cortex.raw.var.index = adata_cortex.raw.var._index
    +adata_cortex.raw.var
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    _index
    _index
    0610005C13Rik0610005C13Rik
    0610006L08Rik0610006L08Rik
    0610007P14Rik0610007P14Rik
    0610009B22Rik0610009B22Rik
    0610009E02Rik0610009E02Rik
    ......
    Zzef1Zzef1
    Zzz3Zzz3
    aa
    l7Rn6l7Rn6
    n-R5s136n-R5s136
    + +

    34617 rows × 1 columns

    +
    +
    +

    +

    Then we run the regular pipline with normalization and dimensionality reduction.

    +
    +
    sc.pp.normalize_total(adata_cortex, target_sum=1e5)
    +sc.pp.log1p(adata_cortex)
    +sc.pp.highly_variable_genes(adata_cortex, min_mean=0.0125, max_mean=3, min_disp=0.5)
    +sc.pp.scale(adata_cortex, max_value=10)
    +sc.tl.pca(adata_cortex, svd_solver='arpack')
    +sc.pp.neighbors(adata_cortex, n_pcs=30)
    +sc.tl.umap(adata_cortex)
    +sc.pl.umap(adata_cortex, color="subclass", legend_loc='on data')
    normalizing counts per cell
         finished (0:00:00)
    @@ -1447,7 +1262,7 @@ 

    -

    +

    -
    -
    adata_cortex.obs.subclass.value_counts()
    -
    +
    +
    adata_cortex.obs.subclass.value_counts()
    +
    subclass
     L6 IT         1872
     Sst           1741
    @@ -1496,19 +1311,19 @@ 

    -
    target_cells = 200
    -
    -adatas2 = [adata_cortex[adata_cortex.obs.subclass == clust] for clust in adata_cortex.obs.subclass.cat.categories]
    -
    -for dat in adatas2:
    -    if dat.n_obs > target_cells:
    -          sc.pp.subsample(dat, n_obs=target_cells)
    -
    -adata_cortex = adatas2[0].concatenate(*adatas2[1:])
    -
    -adata_cortex.obs.subclass.value_counts()
    -
    +
    +
    target_cells = 200
    +
    +adatas2 = [adata_cortex[adata_cortex.obs.subclass == clust] for clust in adata_cortex.obs.subclass.cat.categories]
    +
    +for dat in adatas2:
    +    if dat.n_obs > target_cells:
    +          sc.pp.subsample(dat, n_obs=target_cells)
    +
    +adata_cortex = adatas2[0].concatenate(*adatas2[1:])
    +
    +adata_cortex.obs.subclass.value_counts()
    +
    subclass
     Astro         200
     L6 IT         200
    @@ -1536,27 +1351,27 @@ 

    -
    sc.pl.umap(
    -    adata_cortex, color=["class", "subclass", "genotype", "brain_region"], palette=sc.pl.palettes.default_28
    -)
    +
    +
    sc.pl.umap(
    +    adata_cortex, color=["class", "subclass", "genotype", "brain_region"], palette=sc.pl.palettes.default_28
    +)
    WARNING: Length of palette colors is smaller than the number of categories (palette length: 28, categories length: 61. Some categories will have the same color.
    -

    +

    -
    -
    sc.pl.umap(adata_cortex, color="subclass", legend_loc = 'on data')
    +
    +
    sc.pl.umap(adata_cortex, color="subclass", legend_loc = 'on data')
    -

    +

    @@ -1566,32 +1381,32 @@

    5 Subset ST for cortex

    Since the scRNAseq dataset was generated from the mouse cortex, we will subset the visium dataset in order to select mainly the spots part of the cortex. Note that the integration can also be performed on the whole brain slice, but it would give rise to false positive cell type assignments and therefore it should be interpreted with more care.

    For deconvolution we will need the counts data, so we will subset from the counts_adata object that we created earlier.

    -
    -
    lib_a = "V1_Mouse_Brain_Sagittal_Anterior"
    -
    -counts_adata.obs['clusters'] = adata.obs.clusters
    -
    -adata_anterior_subset = counts_adata[
    -    (counts_adata.obs.library_id == lib_a) 
    -    & (counts_adata.obsm["spatial"][:, 1] < 6000), :
    -].copy()
    -
    -# select also the cortex clusters
    -adata_anterior_subset = adata_anterior_subset[adata_anterior_subset.obs.clusters.isin(['3','4','6','7']),:]
    -
    -# plot to check that we have the correct regions
    -
    -sc.pl.spatial(
    -    adata_anterior_subset,
    -    img_key="hires",
    -    library_id = lib_a,
    -    color=['clusters'],
    -    size=1.5
    -)
    +
    +
    lib_a = "V1_Mouse_Brain_Sagittal_Anterior"
    +
    +counts_adata.obs['clusters'] = adata.obs.clusters
    +
    +adata_anterior_subset = counts_adata[
    +    (counts_adata.obs.library_id == lib_a) 
    +    & (counts_adata.obsm["spatial"][:, 1] < 6000), :
    +].copy()
    +
    +# select also the cortex clusters
    +adata_anterior_subset = adata_anterior_subset[adata_anterior_subset.obs.clusters.isin(['3','5','6']),:]
    +
    +# plot to check that we have the correct regions
    +
    +sc.pl.spatial(
    +    adata_anterior_subset,
    +    img_key="hires",
    +    library_id = lib_a,
    +    color=['clusters'],
    +    size=1.5
    +)
    -

    +

    @@ -1605,12 +1420,11 @@

    6.1 Select genes for deconvolution

    Most deconvolution methods does a prior gene selection and there are different options that are used: - Use variable genes in the SC data. - Use variable genes in both SC and ST data - DE genes between clusters in the SC data.
    In this case we will use top DE genes per cluster, so first we have to run DGE detection on the scRNAseq data.

    -
    -
    sc.tl.rank_genes_groups(adata_cortex, 'subclass', method = "t-test", n_genes=100)
    -sc.pl.rank_genes_groups_dotplot(adata_cortex, n_genes=3)
    +
    +
    sc.tl.rank_genes_groups(adata_cortex, 'subclass', method = "t-test", n_genes=100, use_raw=False)
    +sc.pl.rank_genes_groups_dotplot(adata_cortex, n_genes=3)
    ranking genes
    -WARNING: It seems you use rank_genes_groups on the raw count data. Please logarithmize your data before calling rank_genes_groups.
         finished: added to `.uns['rank_genes_groups']`
         'names', sorted np.recarray to be indexed by group ids
         'scores', sorted np.recarray to be indexed by group ids
    @@ -1624,20 +1438,20 @@ 

    -

    +

    -
    -
    sc.tl.filter_rank_genes_groups(adata_cortex, min_fold_change=1)
    -
    -genes = sc.get.rank_genes_groups_df(adata_cortex, group = None)
    -genes
    +
    +
    sc.tl.filter_rank_genes_groups(adata_cortex, min_fold_change=1)
    +
    +genes = sc.get.rank_genes_groups_df(adata_cortex, group = None)
    +genes
    Filtering genes using: min_in_group_fraction: 0.25 min_fold_change: 1, max_out_group_fraction: 0.5
    -
    +
    @@ -1657,47 +1471,47 @@

    0 Astro -24438 -24.987505 +Slc1a3 +160.842041 NaN -3.887212e-64 -7.446797e-63 +3.174856e-300 +2.358455e-298 1 Astro -25832 -23.915390 -inf -1.620941e-60 -2.924028e-59 +Ntsr2 +150.887924 +NaN +1.097071e-231 +6.056987e-230 2 Astro -18190 -21.893034 -inf -5.711270e-55 -9.514294e-54 +Atp1a2 +138.091385 +11.185134 +0.000000e+00 +0.000000e+00 3 Astro -25833 -21.557812 -inf -5.534148e-54 -9.045118e-53 +Aldoc +131.687424 +NaN +0.000000e+00 +0.000000e+00 4 Astro -2882 -20.950775 -inf -2.972020e-53 -4.796383e-52 +Apoe +124.696922 +9.052421 +0.000000e+00 +0.000000e+00 ... @@ -1711,47 +1525,47 @@

    2295 Vip -24162 -10.645898 -193.386642 -2.184734e-21 -7.812906e-20 +Srrm4 +18.501577 +NaN +2.355896e-47 +1.812313e-45 2296 Vip -31218 -10.593735 -262.868835 -2.901054e-21 -1.025800e-19 +Dlx1as +18.485483 +NaN +7.043876e-47 +5.347322e-45 2297 Vip -18498 -10.579262 +Aplp2 +18.252756 NaN -1.337080e-21 -4.856841e-20 +1.859496e-52 +1.671953e-50 2298 Vip -24420 -10.556447 +Rit2 +18.057701 NaN -3.184234e-21 -1.123635e-19 +1.506046e-48 +1.201263e-46 2299 Vip -29115 -10.549688 -322.383911 -5.200959e-21 -1.811284e-19 +Rab3a +17.847469 +NaN +4.922345e-66 +6.788718e-64 @@ -1760,135 +1574,149 @@

    -
    -
    deg = genes.names.unique().tolist()
    -print(len(deg))
    -# check that the genes are also present in the ST data
    -
    -deg = np.intersect1d(deg,adata_anterior_subset.var.index).tolist()
    -print(len(deg))
    +
    +
    deg = genes.names.unique().tolist()
    +print(len(deg))
    +# check that the genes are also present in the ST data
    +
    +deg = np.intersect1d(deg,adata_anterior_subset.var.index).tolist()
    +print(len(deg))
    -
    1624
    -0
    +
    1388
    +1315
    -

    Train the model

    + +
    +

    6.2 Train the model

    First, train the model using scRNAseq data.

    Stereoscope requires the data to be in counts, earlier in this tutorial we saved the spatial counts in a separate object counts_adata.

    -

    However, the single cell dataset that we dowloaded only has the lognormalized data in the adata.X slot, hence we will have to recalculate the count matrix.

    -
    -
    # first do exponent and subtract pseudocount
    -E = np.exp(adata_cortex.X)-1
    -n = np.sum(E,1)
    -print(np.min(n), np.max(n))
    -# all sums to 1.7M
    -factor = np.mean(n)
    -nC = np.array(adata_cortex.obs.nCount_RNA) # true number of counts
    -scaleF = nC/factor
    -C = E * scaleF[:,None]
    -C = C.astype("int")
    -
    -
    151625.94316551366 10392533.010632813
    -
    -
    +

    In the single cell data we have the raw counts in the raw.X matrix so that one will be used. So here we create a new object with all the correct slots for scVI.

    -
    sc_adata = adata_cortex.copy()
    -sc_adata.X = C
    +
    sc_adata = adata_cortex.copy()
    +sc_adata.X = adata_cortex.raw.X.copy()

    Setup the anndata, the implementation requires the counts matrix to be in the “counts” layer as a copy.

    -
    # this chunk has issues and therefore not evaluated
    -
    -import scvi
    -# from scvi.data import register_tensor_from_anndata
    -from scvi.external import RNAStereoscope, SpatialStereoscope
    -
    -# add counts layer
    -sc_adata.layers["counts"] = sc_adata.X.copy()
    -
    -# subset for the selected genes
    -sc_adata = sc_adata[:, deg].copy()
    -
    -# create stereoscope object
    -RNAStereoscope.setup_anndata(sc_adata, layer="counts", labels_key="subclass")
    +
    import scvi
    +# from scvi.data import register_tensor_from_anndata
    +from scvi.external import RNAStereoscope, SpatialStereoscope
    +
    +# add counts layer
    +sc_adata.layers["counts"] = sc_adata.X.copy()
    +
    +# subset for the selected genes
    +sc_adata = sc_adata[:, deg].copy()
    +
    +# create stereoscope object
    +RNAStereoscope.setup_anndata(sc_adata, layer="counts", labels_key="subclass")
    -
    # this chunk has issues and therefore not evaluated
    -
    -# the model is saved to a file, so if is slow to run, you can simply reload it from disk by setting train = False
    -
    -train = True
    -if train:
    -    sc_model = RNAStereoscope(sc_adata)
    -    sc_model.train(max_epochs=300)
    -    sc_model.history["elbo_train"][10:].plot()
    -    sc_model.save("./data/spatial/visium/scmodel", overwrite=True)
    -else:
    -    sc_model = RNAStereoscope.load("./data/spatial/visium/scmodel", sc_adata)
    -    print("Loaded RNA model from file!")
    -
    -

    Predict propritions on the spatial data

    +
    # the model is saved to a file, so if is slow to run, you can simply reload it from disk by setting train = False
    +
    +train = True
    +if train:
    +    sc_model = RNAStereoscope(sc_adata)
    +    sc_model.train(max_epochs=300)
    +    sc_model.history["elbo_train"][10:].plot()
    +    sc_model.save("./data/spatial/visium/scmodel", overwrite=True)
    +else:
    +    sc_model = RNAStereoscope.load("./data/spatial/visium/scmodel", sc_adata)
    +    print("Loaded RNA model from file!")
    +
    +
    Training:   0%|          | 0/300 [00:00<?, ?it/s]Epoch 1/300:   0%|          | 0/300 [00:00<?, ?it/s]Epoch 1/300:   0%|          | 1/300 [00:00<02:34,  1.94it/s]Epoch 1/300:   0%|          | 1/300 [00:00<02:34,  1.94it/s, v_num=1, train_loss_step=3.76e+10, train_loss_epoch=4.61e+10]Epoch 2/300:   0%|          | 1/300 [00:00<02:34,  1.94it/s, v_num=1, train_loss_step=3.76e+10, train_loss_epoch=4.61e+10]Epoch 2/300:   1%|          | 2/300 [00:00<02:13,  2.23it/s, v_num=1, train_loss_step=3.76e+10, train_loss_epoch=4.61e+10]Epoch 2/300:   1%|          | 2/300 [00:00<02:13,  2.23it/s, v_num=1, train_loss_step=2.87e+10, train_loss_epoch=3.41e+10]Epoch 3/300:   1%|          | 2/300 [00:00<02:13,  2.23it/s, v_num=1, train_loss_step=2.87e+10, train_loss_epoch=3.41e+10]Epoch 3/300:   1%|          | 3/300 [00:01<02:07,  2.33it/s, v_num=1, train_loss_step=2.87e+10, train_loss_epoch=3.41e+10]Epoch 3/300:   1%|          | 3/300 [00:01<02:07,  2.33it/s, v_num=1, train_loss_step=2.13e+10, train_loss_epoch=2.58e+10]Epoch 4/300:   1%|          | 3/300 [00:01<02:07,  2.33it/s, v_num=1, train_loss_step=2.13e+10, train_loss_epoch=2.58e+10]Epoch 4/300:   1%|▏         | 4/300 [00:01<02:02,  2.42it/s, v_num=1, train_loss_step=2.13e+10, train_loss_epoch=2.58e+10]Epoch 4/300:   1%|▏         | 4/300 [00:01<02:02,  2.42it/s, v_num=1, train_loss_step=1.65e+10, train_loss_epoch=2e+10]   Epoch 5/300:   1%|▏         | 4/300 [00:01<02:02,  2.42it/s, v_num=1, train_loss_step=1.65e+10, train_loss_epoch=2e+10]Epoch 5/300:   2%|▏         | 5/300 [00:02<02:01,  2.43it/s, v_num=1, train_loss_step=1.65e+10, train_loss_epoch=2e+10]Epoch 5/300:   2%|▏         | 5/300 [00:02<02:01,  2.43it/s, v_num=1, train_loss_step=1.39e+10, train_loss_epoch=1.58e+10]Epoch 6/300:   2%|▏         | 5/300 [00:02<02:01,  2.43it/s, v_num=1, train_loss_step=1.39e+10, train_loss_epoch=1.58e+10]Epoch 6/300:   2%|▏         | 6/300 [00:02<02:00,  2.44it/s, v_num=1, train_loss_step=1.39e+10, train_loss_epoch=1.58e+10]Epoch 6/300:   2%|▏         | 6/300 [00:02<02:00,  2.44it/s, v_num=1, train_loss_step=1.13e+10, train_loss_epoch=1.28e+10]Epoch 7/300:   2%|▏         | 6/300 [00:02<02:00,  2.44it/s, v_num=1, train_loss_step=1.13e+10, train_loss_epoch=1.28e+10]Epoch 7/300:   2%|▏         | 7/300 [00:02<01:57,  2.49it/s, v_num=1, train_loss_step=1.13e+10, train_loss_epoch=1.28e+10]Epoch 7/300:   2%|▏         | 7/300 [00:02<01:57,  2.49it/s, v_num=1, train_loss_step=9.1e+9, train_loss_epoch=1.05e+10]  Epoch 8/300:   2%|▏         | 7/300 [00:02<01:57,  2.49it/s, v_num=1, train_loss_step=9.1e+9, train_loss_epoch=1.05e+10]Epoch 8/300:   3%|▎         | 8/300 [00:03<01:55,  2.52it/s, v_num=1, train_loss_step=9.1e+9, train_loss_epoch=1.05e+10]Epoch 8/300:   3%|▎         | 8/300 [00:03<01:55,  2.52it/s, v_num=1, train_loss_step=7.64e+9, train_loss_epoch=8.79e+9]Epoch 9/300:   3%|▎         | 8/300 [00:03<01:55,  2.52it/s, v_num=1, train_loss_step=7.64e+9, train_loss_epoch=8.79e+9]Epoch 9/300:   3%|▎         | 9/300 [00:03<01:54,  2.53it/s, v_num=1, train_loss_step=7.64e+9, train_loss_epoch=8.79e+9]Epoch 9/300:   3%|▎         | 9/300 [00:03<01:54,  2.53it/s, v_num=1, train_loss_step=6.73e+9, train_loss_epoch=7.44e+9]Epoch 10/300:   3%|▎         | 9/300 [00:03<01:54,  2.53it/s, v_num=1, train_loss_step=6.73e+9, train_loss_epoch=7.44e+9]Epoch 10/300:   3%|▎         | 10/300 [00:04<01:53,  2.55it/s, v_num=1, train_loss_step=6.73e+9, train_loss_epoch=7.44e+9]Epoch 10/300:   3%|▎         | 10/300 [00:04<01:53,  2.55it/s, v_num=1, train_loss_step=5.55e+9, train_loss_epoch=6.37e+9]Epoch 11/300:   3%|▎         | 10/300 [00:04<01:53,  2.55it/s, v_num=1, train_loss_step=5.55e+9, train_loss_epoch=6.37e+9]Epoch 11/300:   4%|▎         | 11/300 [00:04<01:52,  2.57it/s, v_num=1, train_loss_step=5.55e+9, train_loss_epoch=6.37e+9]Epoch 11/300:   4%|▎         | 11/300 [00:04<01:52,  2.57it/s, v_num=1, train_loss_step=5.03e+9, train_loss_epoch=5.51e+9]Epoch 12/300:   4%|▎         | 11/300 [00:04<01:52,  2.57it/s, v_num=1, train_loss_step=5.03e+9, train_loss_epoch=5.51e+9]Epoch 12/300:   4%|▍         | 12/300 [00:04<01:51,  2.58it/s, v_num=1, train_loss_step=5.03e+9, train_loss_epoch=5.51e+9]Epoch 12/300:   4%|▍         | 12/300 [00:04<01:51,  2.58it/s, v_num=1, train_loss_step=4.43e+9, train_loss_epoch=4.81e+9]Epoch 13/300:   4%|▍         | 12/300 [00:04<01:51,  2.58it/s, v_num=1, train_loss_step=4.43e+9, train_loss_epoch=4.81e+9]Epoch 13/300:   4%|▍         | 13/300 [00:05<01:50,  2.59it/s, v_num=1, train_loss_step=4.43e+9, train_loss_epoch=4.81e+9]Epoch 13/300:   4%|▍         | 13/300 [00:05<01:50,  2.59it/s, v_num=1, train_loss_step=3.86e+9, train_loss_epoch=4.23e+9]Epoch 14/300:   4%|▍         | 13/300 [00:05<01:50,  2.59it/s, v_num=1, train_loss_step=3.86e+9, train_loss_epoch=4.23e+9]Epoch 14/300:   5%|▍         | 14/300 [00:05<01:50,  2.59it/s, v_num=1, train_loss_step=3.86e+9, train_loss_epoch=4.23e+9]Epoch 14/300:   5%|▍         | 14/300 [00:05<01:50,  2.59it/s, v_num=1, train_loss_step=3.3e+9, train_loss_epoch=3.75e+9] Epoch 15/300:   5%|▍         | 14/300 [00:05<01:50,  2.59it/s, v_num=1, train_loss_step=3.3e+9, train_loss_epoch=3.75e+9]Epoch 15/300:   5%|▌         | 15/300 [00:05<01:49,  2.60it/s, v_num=1, train_loss_step=3.3e+9, train_loss_epoch=3.75e+9]Epoch 15/300:   5%|▌         | 15/300 [00:05<01:49,  2.60it/s, v_num=1, train_loss_step=3.01e+9, train_loss_epoch=3.35e+9]Epoch 16/300:   5%|▌         | 15/300 [00:05<01:49,  2.60it/s, v_num=1, train_loss_step=3.01e+9, train_loss_epoch=3.35e+9]Epoch 16/300:   5%|▌         | 16/300 [00:06<01:49,  2.60it/s, v_num=1, train_loss_step=3.01e+9, train_loss_epoch=3.35e+9]Epoch 16/300:   5%|▌         | 16/300 [00:06<01:49,  2.60it/s, v_num=1, train_loss_step=2.82e+9, train_loss_epoch=3e+9]   Epoch 17/300:   5%|▌         | 16/300 [00:06<01:49,  2.60it/s, v_num=1, train_loss_step=2.82e+9, train_loss_epoch=3e+9]Epoch 17/300:   6%|▌         | 17/300 [00:06<01:49,  2.59it/s, v_num=1, train_loss_step=2.82e+9, train_loss_epoch=3e+9]Epoch 17/300:   6%|▌         | 17/300 [00:06<01:49,  2.59it/s, v_num=1, train_loss_step=2.46e+9, train_loss_epoch=2.71e+9]Epoch 18/300:   6%|▌         | 17/300 [00:06<01:49,  2.59it/s, v_num=1, train_loss_step=2.46e+9, train_loss_epoch=2.71e+9]Epoch 18/300:   6%|▌         | 18/300 [00:07<01:48,  2.60it/s, v_num=1, train_loss_step=2.46e+9, train_loss_epoch=2.71e+9]Epoch 18/300:   6%|▌         | 18/300 [00:07<01:48,  2.60it/s, v_num=1, train_loss_step=2.19e+9, train_loss_epoch=2.46e+9]Epoch 19/300:   6%|▌         | 18/300 [00:07<01:48,  2.60it/s, v_num=1, train_loss_step=2.19e+9, train_loss_epoch=2.46e+9]Epoch 19/300:   6%|▋         | 19/300 [00:07<01:49,  2.56it/s, v_num=1, train_loss_step=2.19e+9, train_loss_epoch=2.46e+9]Epoch 19/300:   6%|▋         | 19/300 [00:07<01:49,  2.56it/s, v_num=1, train_loss_step=2.07e+9, train_loss_epoch=2.24e+9]Epoch 20/300:   6%|▋         | 19/300 [00:07<01:49,  2.56it/s, v_num=1, train_loss_step=2.07e+9, train_loss_epoch=2.24e+9]Epoch 20/300:   7%|▋         | 20/300 [00:07<01:48,  2.58it/s, v_num=1, train_loss_step=2.07e+9, train_loss_epoch=2.24e+9]Epoch 20/300:   7%|▋         | 20/300 [00:07<01:48,  2.58it/s, v_num=1, train_loss_step=1.86e+9, train_loss_epoch=2.05e+9]Epoch 21/300:   7%|▋         | 20/300 [00:07<01:48,  2.58it/s, v_num=1, train_loss_step=1.86e+9, train_loss_epoch=2.05e+9]Epoch 21/300:   7%|▋         | 21/300 [00:08<01:47,  2.60it/s, v_num=1, train_loss_step=1.86e+9, train_loss_epoch=2.05e+9]Epoch 21/300:   7%|▋         | 21/300 [00:08<01:47,  2.60it/s, v_num=1, train_loss_step=1.73e+9, train_loss_epoch=1.88e+9]Epoch 22/300:   7%|▋         | 21/300 [00:08<01:47,  2.60it/s, v_num=1, train_loss_step=1.73e+9, train_loss_epoch=1.88e+9]Epoch 22/300:   7%|▋         | 22/300 [00:08<01:47,  2.59it/s, v_num=1, train_loss_step=1.73e+9, train_loss_epoch=1.88e+9]Epoch 22/300:   7%|▋         | 22/300 [00:08<01:47,  2.59it/s, v_num=1, train_loss_step=1.56e+9, train_loss_epoch=1.73e+9]Epoch 23/300:   7%|▋         | 22/300 [00:08<01:47,  2.59it/s, v_num=1, train_loss_step=1.56e+9, train_loss_epoch=1.73e+9]Epoch 23/300:   8%|▊         | 23/300 [00:09<01:46,  2.61it/s, v_num=1, train_loss_step=1.56e+9, train_loss_epoch=1.73e+9]Epoch 23/300:   8%|▊         | 23/300 [00:09<01:46,  2.61it/s, v_num=1, train_loss_step=1.48e+9, train_loss_epoch=1.6e+9] Epoch 24/300:   8%|▊         | 23/300 [00:09<01:46,  2.61it/s, v_num=1, train_loss_step=1.48e+9, train_loss_epoch=1.6e+9]Epoch 24/300:   8%|▊         | 24/300 [00:09<01:46,  2.60it/s, v_num=1, train_loss_step=1.48e+9, train_loss_epoch=1.6e+9]Epoch 24/300:   8%|▊         | 24/300 [00:09<01:46,  2.60it/s, v_num=1, train_loss_step=1.43e+9, train_loss_epoch=1.48e+9]Epoch 25/300:   8%|▊         | 24/300 [00:09<01:46,  2.60it/s, v_num=1, train_loss_step=1.43e+9, train_loss_epoch=1.48e+9]Epoch 25/300:   8%|▊         | 25/300 [00:09<01:44,  2.62it/s, v_num=1, train_loss_step=1.43e+9, train_loss_epoch=1.48e+9]Epoch 25/300:   8%|▊         | 25/300 [00:09<01:44,  2.62it/s, v_num=1, train_loss_step=1.26e+9, train_loss_epoch=1.38e+9]Epoch 26/300:   8%|▊         | 25/300 [00:09<01:44,  2.62it/s, v_num=1, train_loss_step=1.26e+9, train_loss_epoch=1.38e+9]Epoch 26/300:   9%|▊         | 26/300 [00:10<01:43,  2.64it/s, v_num=1, train_loss_step=1.26e+9, train_loss_epoch=1.38e+9]Epoch 26/300:   9%|▊         | 26/300 [00:10<01:43,  2.64it/s, v_num=1, train_loss_step=1.12e+9, train_loss_epoch=1.28e+9]Epoch 27/300:   9%|▊         | 26/300 [00:10<01:43,  2.64it/s, v_num=1, train_loss_step=1.12e+9, train_loss_epoch=1.28e+9]Epoch 27/300:   9%|▉         | 27/300 [00:10<01:43,  2.64it/s, v_num=1, train_loss_step=1.12e+9, train_loss_epoch=1.28e+9]Epoch 27/300:   9%|▉         | 27/300 [00:10<01:43,  2.64it/s, v_num=1, train_loss_step=1.16e+9, train_loss_epoch=1.2e+9] Epoch 28/300:   9%|▉         | 27/300 [00:10<01:43,  2.64it/s, v_num=1, train_loss_step=1.16e+9, train_loss_epoch=1.2e+9]Epoch 28/300:   9%|▉         | 28/300 [00:10<01:43,  2.64it/s, v_num=1, train_loss_step=1.16e+9, train_loss_epoch=1.2e+9]Epoch 28/300:   9%|▉         | 28/300 [00:10<01:43,  2.64it/s, v_num=1, train_loss_step=1.05e+9, train_loss_epoch=1.12e+9]Epoch 29/300:   9%|▉         | 28/300 [00:10<01:43,  2.64it/s, v_num=1, train_loss_step=1.05e+9, train_loss_epoch=1.12e+9]Epoch 29/300:  10%|▉         | 29/300 [00:11<01:43,  2.63it/s, v_num=1, train_loss_step=1.05e+9, train_loss_epoch=1.12e+9]Epoch 29/300:  10%|▉         | 29/300 [00:11<01:43,  2.63it/s, v_num=1, train_loss_step=9.91e+8, train_loss_epoch=1.05e+9]Epoch 30/300:  10%|▉         | 29/300 [00:11<01:43,  2.63it/s, v_num=1, train_loss_step=9.91e+8, train_loss_epoch=1.05e+9]Epoch 30/300:  10%|█         | 30/300 [00:11<01:42,  2.63it/s, v_num=1, train_loss_step=9.91e+8, train_loss_epoch=1.05e+9]Epoch 30/300:  10%|█         | 30/300 [00:11<01:42,  2.63it/s, v_num=1, train_loss_step=9.33e+8, train_loss_epoch=9.86e+8]Epoch 31/300:  10%|█         | 30/300 [00:11<01:42,  2.63it/s, v_num=1, train_loss_step=9.33e+8, train_loss_epoch=9.86e+8]Epoch 31/300:  10%|█         | 31/300 [00:12<01:41,  2.64it/s, v_num=1, train_loss_step=9.33e+8, train_loss_epoch=9.86e+8]Epoch 31/300:  10%|█         | 31/300 [00:12<01:41,  2.64it/s, v_num=1, train_loss_step=8.15e+8, train_loss_epoch=9.28e+8]Epoch 32/300:  10%|█         | 31/300 [00:12<01:41,  2.64it/s, v_num=1, train_loss_step=8.15e+8, train_loss_epoch=9.28e+8]Epoch 32/300:  11%|█         | 32/300 [00:12<01:42,  2.61it/s, v_num=1, train_loss_step=8.15e+8, train_loss_epoch=9.28e+8]Epoch 32/300:  11%|█         | 32/300 [00:12<01:42,  2.61it/s, v_num=1, train_loss_step=7.95e+8, train_loss_epoch=8.74e+8]Epoch 33/300:  11%|█         | 32/300 [00:12<01:42,  2.61it/s, v_num=1, train_loss_step=7.95e+8, train_loss_epoch=8.74e+8]Epoch 33/300:  11%|█         | 33/300 [00:12<01:43,  2.59it/s, v_num=1, train_loss_step=7.95e+8, train_loss_epoch=8.74e+8]Epoch 33/300:  11%|█         | 33/300 [00:12<01:43,  2.59it/s, v_num=1, train_loss_step=7.73e+8, train_loss_epoch=8.26e+8]Epoch 34/300:  11%|█         | 33/300 [00:12<01:43,  2.59it/s, v_num=1, train_loss_step=7.73e+8, train_loss_epoch=8.26e+8]Epoch 34/300:  11%|█▏        | 34/300 [00:13<01:41,  2.61it/s, v_num=1, train_loss_step=7.73e+8, train_loss_epoch=8.26e+8]Epoch 34/300:  11%|█▏        | 34/300 [00:13<01:41,  2.61it/s, v_num=1, train_loss_step=7.08e+8, train_loss_epoch=7.81e+8]Epoch 35/300:  11%|█▏        | 34/300 [00:13<01:41,  2.61it/s, v_num=1, train_loss_step=7.08e+8, train_loss_epoch=7.81e+8]Epoch 35/300:  12%|█▏        | 35/300 [00:13<01:40,  2.63it/s, v_num=1, train_loss_step=7.08e+8, train_loss_epoch=7.81e+8]Epoch 35/300:  12%|█▏        | 35/300 [00:13<01:40,  2.63it/s, v_num=1, train_loss_step=7.04e+8, train_loss_epoch=7.39e+8]Epoch 36/300:  12%|█▏        | 35/300 [00:13<01:40,  2.63it/s, v_num=1, train_loss_step=7.04e+8, train_loss_epoch=7.39e+8]Epoch 36/300:  12%|█▏        | 36/300 [00:14<01:40,  2.63it/s, v_num=1, train_loss_step=7.04e+8, train_loss_epoch=7.39e+8]Epoch 36/300:  12%|█▏        | 36/300 [00:14<01:40,  2.63it/s, v_num=1, train_loss_step=6.5e+8, train_loss_epoch=7.01e+8] Epoch 37/300:  12%|█▏        | 36/300 [00:14<01:40,  2.63it/s, v_num=1, train_loss_step=6.5e+8, train_loss_epoch=7.01e+8]Epoch 37/300:  12%|█▏        | 37/300 [00:14<01:40,  2.63it/s, v_num=1, train_loss_step=6.5e+8, train_loss_epoch=7.01e+8]Epoch 37/300:  12%|█▏        | 37/300 [00:14<01:40,  2.63it/s, v_num=1, train_loss_step=6.23e+8, train_loss_epoch=6.66e+8]Epoch 38/300:  12%|█▏        | 37/300 [00:14<01:40,  2.63it/s, v_num=1, train_loss_step=6.23e+8, train_loss_epoch=6.66e+8]Epoch 38/300:  13%|█▎        | 38/300 [00:14<01:39,  2.63it/s, v_num=1, train_loss_step=6.23e+8, train_loss_epoch=6.66e+8]Epoch 38/300:  13%|█▎        | 38/300 [00:14<01:39,  2.63it/s, v_num=1, train_loss_step=6e+8, train_loss_epoch=6.33e+8]   Epoch 39/300:  13%|█▎        | 38/300 [00:14<01:39,  2.63it/s, v_num=1, train_loss_step=6e+8, train_loss_epoch=6.33e+8]Epoch 39/300:  13%|█▎        | 39/300 [00:15<01:39,  2.64it/s, v_num=1, train_loss_step=6e+8, train_loss_epoch=6.33e+8]Epoch 39/300:  13%|█▎        | 39/300 [00:15<01:39,  2.64it/s, v_num=1, train_loss_step=5.98e+8, train_loss_epoch=6.02e+8]Epoch 40/300:  13%|█▎        | 39/300 [00:15<01:39,  2.64it/s, v_num=1, train_loss_step=5.98e+8, train_loss_epoch=6.02e+8]Epoch 40/300:  13%|█▎        | 40/300 [00:15<01:38,  2.64it/s, v_num=1, train_loss_step=5.98e+8, train_loss_epoch=6.02e+8]Epoch 40/300:  13%|█▎        | 40/300 [00:15<01:38,  2.64it/s, v_num=1, train_loss_step=5.24e+8, train_loss_epoch=5.74e+8]Epoch 41/300:  13%|█▎        | 40/300 [00:15<01:38,  2.64it/s, v_num=1, train_loss_step=5.24e+8, train_loss_epoch=5.74e+8]Epoch 41/300:  14%|█▎        | 41/300 [00:15<01:38,  2.63it/s, v_num=1, train_loss_step=5.24e+8, train_loss_epoch=5.74e+8]Epoch 41/300:  14%|█▎        | 41/300 [00:15<01:38,  2.63it/s, v_num=1, train_loss_step=5.13e+8, train_loss_epoch=5.47e+8]Epoch 42/300:  14%|█▎        | 41/300 [00:15<01:38,  2.63it/s, v_num=1, train_loss_step=5.13e+8, train_loss_epoch=5.47e+8]Epoch 42/300:  14%|█▍        | 42/300 [00:16<01:38,  2.63it/s, v_num=1, train_loss_step=5.13e+8, train_loss_epoch=5.47e+8]Epoch 42/300:  14%|█▍        | 42/300 [00:16<01:38,  2.63it/s, v_num=1, train_loss_step=4.91e+8, train_loss_epoch=5.22e+8]Epoch 43/300:  14%|█▍        | 42/300 [00:16<01:38,  2.63it/s, v_num=1, train_loss_step=4.91e+8, train_loss_epoch=5.22e+8]Epoch 43/300:  14%|█▍        | 43/300 [00:16<01:37,  2.65it/s, v_num=1, train_loss_step=4.91e+8, train_loss_epoch=5.22e+8]Epoch 43/300:  14%|█▍        | 43/300 [00:16<01:37,  2.65it/s, v_num=1, train_loss_step=4.77e+8, train_loss_epoch=4.99e+8]Epoch 44/300:  14%|█▍        | 43/300 [00:16<01:37,  2.65it/s, v_num=1, train_loss_step=4.77e+8, train_loss_epoch=4.99e+8]Epoch 44/300:  15%|█▍        | 44/300 [00:17<01:36,  2.66it/s, v_num=1, train_loss_step=4.77e+8, train_loss_epoch=4.99e+8]Epoch 44/300:  15%|█▍        | 44/300 [00:17<01:36,  2.66it/s, v_num=1, train_loss_step=4.39e+8, train_loss_epoch=4.77e+8]Epoch 45/300:  15%|█▍        | 44/300 [00:17<01:36,  2.66it/s, v_num=1, train_loss_step=4.39e+8, train_loss_epoch=4.77e+8]Epoch 45/300:  15%|█▌        | 45/300 [00:17<01:36,  2.65it/s, v_num=1, train_loss_step=4.39e+8, train_loss_epoch=4.77e+8]Epoch 45/300:  15%|█▌        | 45/300 [00:17<01:36,  2.65it/s, v_num=1, train_loss_step=4.27e+8, train_loss_epoch=4.57e+8]Epoch 46/300:  15%|█▌        | 45/300 [00:17<01:36,  2.65it/s, v_num=1, train_loss_step=4.27e+8, train_loss_epoch=4.57e+8]Epoch 46/300:  15%|█▌        | 46/300 [00:17<01:42,  2.48it/s, v_num=1, train_loss_step=4.27e+8, train_loss_epoch=4.57e+8]Epoch 46/300:  15%|█▌        | 46/300 [00:17<01:42,  2.48it/s, v_num=1, train_loss_step=4.31e+8, train_loss_epoch=4.38e+8]Epoch 47/300:  15%|█▌        | 46/300 [00:17<01:42,  2.48it/s, v_num=1, train_loss_step=4.31e+8, train_loss_epoch=4.38e+8]Epoch 47/300:  16%|█▌        | 47/300 [00:18<01:41,  2.50it/s, v_num=1, train_loss_step=4.31e+8, train_loss_epoch=4.38e+8]Epoch 47/300:  16%|█▌        | 47/300 [00:18<01:41,  2.50it/s, v_num=1, train_loss_step=3.85e+8, train_loss_epoch=4.2e+8] Epoch 48/300:  16%|█▌        | 47/300 [00:18<01:41,  2.50it/s, v_num=1, train_loss_step=3.85e+8, train_loss_epoch=4.2e+8]Epoch 48/300:  16%|█▌        | 48/300 [00:18<01:39,  2.54it/s, v_num=1, train_loss_step=3.85e+8, train_loss_epoch=4.2e+8]Epoch 48/300:  16%|█▌        | 48/300 [00:18<01:39,  2.54it/s, v_num=1, train_loss_step=3.8e+8, train_loss_epoch=4.03e+8]Epoch 49/300:  16%|█▌        | 48/300 [00:18<01:39,  2.54it/s, v_num=1, train_loss_step=3.8e+8, train_loss_epoch=4.03e+8]Epoch 49/300:  16%|█▋        | 49/300 [00:19<01:41,  2.48it/s, v_num=1, train_loss_step=3.8e+8, train_loss_epoch=4.03e+8]Epoch 49/300:  16%|█▋        | 49/300 [00:19<01:41,  2.48it/s, v_num=1, train_loss_step=3.7e+8, train_loss_epoch=3.87e+8]Epoch 50/300:  16%|█▋        | 49/300 [00:19<01:41,  2.48it/s, v_num=1, train_loss_step=3.7e+8, train_loss_epoch=3.87e+8]Epoch 50/300:  17%|█▋        | 50/300 [00:19<01:39,  2.52it/s, v_num=1, train_loss_step=3.7e+8, train_loss_epoch=3.87e+8]Epoch 50/300:  17%|█▋        | 50/300 [00:19<01:39,  2.52it/s, v_num=1, train_loss_step=3.57e+8, train_loss_epoch=3.72e+8]Epoch 51/300:  17%|█▋        | 50/300 [00:19<01:39,  2.52it/s, v_num=1, train_loss_step=3.57e+8, train_loss_epoch=3.72e+8]Epoch 51/300:  17%|█▋        | 51/300 [00:19<01:37,  2.56it/s, v_num=1, train_loss_step=3.57e+8, train_loss_epoch=3.72e+8]Epoch 51/300:  17%|█▋        | 51/300 [00:19<01:37,  2.56it/s, v_num=1, train_loss_step=3.25e+8, train_loss_epoch=3.58e+8]Epoch 52/300:  17%|█▋        | 51/300 [00:19<01:37,  2.56it/s, v_num=1, train_loss_step=3.25e+8, train_loss_epoch=3.58e+8]Epoch 52/300:  17%|█▋        | 52/300 [00:20<01:36,  2.57it/s, v_num=1, train_loss_step=3.25e+8, train_loss_epoch=3.58e+8]Epoch 52/300:  17%|█▋        | 52/300 [00:20<01:36,  2.57it/s, v_num=1, train_loss_step=3.33e+8, train_loss_epoch=3.44e+8]Epoch 53/300:  17%|█▋        | 52/300 [00:20<01:36,  2.57it/s, v_num=1, train_loss_step=3.33e+8, train_loss_epoch=3.44e+8]Epoch 53/300:  18%|█▊        | 53/300 [00:20<01:34,  2.61it/s, v_num=1, train_loss_step=3.33e+8, train_loss_epoch=3.44e+8]Epoch 53/300:  18%|█▊        | 53/300 [00:20<01:34,  2.61it/s, v_num=1, train_loss_step=3.23e+8, train_loss_epoch=3.31e+8]Epoch 54/300:  18%|█▊        | 53/300 [00:20<01:34,  2.61it/s, v_num=1, train_loss_step=3.23e+8, train_loss_epoch=3.31e+8]Epoch 54/300:  18%|█▊        | 54/300 [00:20<01:35,  2.59it/s, v_num=1, train_loss_step=3.23e+8, train_loss_epoch=3.31e+8]Epoch 54/300:  18%|█▊        | 54/300 [00:20<01:35,  2.59it/s, v_num=1, train_loss_step=3.02e+8, train_loss_epoch=3.19e+8]Epoch 55/300:  18%|█▊        | 54/300 [00:20<01:35,  2.59it/s, v_num=1, train_loss_step=3.02e+8, train_loss_epoch=3.19e+8]Epoch 55/300:  18%|█▊        | 55/300 [00:21<01:34,  2.60it/s, v_num=1, train_loss_step=3.02e+8, train_loss_epoch=3.19e+8]Epoch 55/300:  18%|█▊        | 55/300 [00:21<01:34,  2.60it/s, v_num=1, train_loss_step=2.86e+8, train_loss_epoch=3.08e+8]Epoch 56/300:  18%|█▊        | 55/300 [00:21<01:34,  2.60it/s, v_num=1, train_loss_step=2.86e+8, train_loss_epoch=3.08e+8]Epoch 56/300:  19%|█▊        | 56/300 [00:21<01:33,  2.61it/s, v_num=1, train_loss_step=2.86e+8, train_loss_epoch=3.08e+8]Epoch 56/300:  19%|█▊        | 56/300 [00:21<01:33,  2.61it/s, v_num=1, train_loss_step=2.91e+8, train_loss_epoch=2.97e+8]Epoch 57/300:  19%|█▊        | 56/300 [00:21<01:33,  2.61it/s, v_num=1, train_loss_step=2.91e+8, train_loss_epoch=2.97e+8]Epoch 57/300:  19%|█▉        | 57/300 [00:22<01:32,  2.63it/s, v_num=1, train_loss_step=2.91e+8, train_loss_epoch=2.97e+8]Epoch 57/300:  19%|█▉        | 57/300 [00:22<01:32,  2.63it/s, v_num=1, train_loss_step=2.69e+8, train_loss_epoch=2.87e+8]Epoch 58/300:  19%|█▉        | 57/300 [00:22<01:32,  2.63it/s, v_num=1, train_loss_step=2.69e+8, train_loss_epoch=2.87e+8]Epoch 58/300:  19%|█▉        | 58/300 [00:22<01:32,  2.62it/s, v_num=1, train_loss_step=2.69e+8, train_loss_epoch=2.87e+8]Epoch 58/300:  19%|█▉        | 58/300 [00:22<01:32,  2.62it/s, v_num=1, train_loss_step=2.57e+8, train_loss_epoch=2.77e+8]Epoch 59/300:  19%|█▉        | 58/300 [00:22<01:32,  2.62it/s, v_num=1, train_loss_step=2.57e+8, train_loss_epoch=2.77e+8]Epoch 59/300:  20%|█▉        | 59/300 [00:22<01:32,  2.60it/s, v_num=1, train_loss_step=2.57e+8, train_loss_epoch=2.77e+8]Epoch 59/300:  20%|█▉        | 59/300 [00:22<01:32,  2.60it/s, v_num=1, train_loss_step=2.48e+8, train_loss_epoch=2.67e+8]Epoch 60/300:  20%|█▉        | 59/300 [00:22<01:32,  2.60it/s, v_num=1, train_loss_step=2.48e+8, train_loss_epoch=2.67e+8]Epoch 60/300:  20%|██        | 60/300 [00:23<01:32,  2.58it/s, v_num=1, train_loss_step=2.48e+8, train_loss_epoch=2.67e+8]Epoch 60/300:  20%|██        | 60/300 [00:23<01:32,  2.58it/s, v_num=1, train_loss_step=2.41e+8, train_loss_epoch=2.59e+8]Epoch 61/300:  20%|██        | 60/300 [00:23<01:32,  2.58it/s, v_num=1, train_loss_step=2.41e+8, train_loss_epoch=2.59e+8]Epoch 61/300:  20%|██        | 61/300 [00:23<01:32,  2.57it/s, v_num=1, train_loss_step=2.41e+8, train_loss_epoch=2.59e+8]Epoch 61/300:  20%|██        | 61/300 [00:23<01:32,  2.57it/s, v_num=1, train_loss_step=2.44e+8, train_loss_epoch=2.5e+8] Epoch 62/300:  20%|██        | 61/300 [00:23<01:32,  2.57it/s, v_num=1, train_loss_step=2.44e+8, train_loss_epoch=2.5e+8]Epoch 62/300:  21%|██        | 62/300 [00:24<01:31,  2.60it/s, v_num=1, train_loss_step=2.44e+8, train_loss_epoch=2.5e+8]Epoch 62/300:  21%|██        | 62/300 [00:24<01:31,  2.60it/s, v_num=1, train_loss_step=2.28e+8, train_loss_epoch=2.42e+8]Epoch 63/300:  21%|██        | 62/300 [00:24<01:31,  2.60it/s, v_num=1, train_loss_step=2.28e+8, train_loss_epoch=2.42e+8]Epoch 63/300:  21%|██        | 63/300 [00:24<01:30,  2.61it/s, v_num=1, train_loss_step=2.28e+8, train_loss_epoch=2.42e+8]Epoch 63/300:  21%|██        | 63/300 [00:24<01:30,  2.61it/s, v_num=1, train_loss_step=2.32e+8, train_loss_epoch=2.34e+8]Epoch 64/300:  21%|██        | 63/300 [00:24<01:30,  2.61it/s, v_num=1, train_loss_step=2.32e+8, train_loss_epoch=2.34e+8]Epoch 64/300:  21%|██▏       | 64/300 [00:24<01:30,  2.62it/s, v_num=1, train_loss_step=2.32e+8, train_loss_epoch=2.34e+8]Epoch 64/300:  21%|██▏       | 64/300 [00:24<01:30,  2.62it/s, v_num=1, train_loss_step=2.08e+8, train_loss_epoch=2.27e+8]Epoch 65/300:  21%|██▏       | 64/300 [00:24<01:30,  2.62it/s, v_num=1, train_loss_step=2.08e+8, train_loss_epoch=2.27e+8]Epoch 65/300:  22%|██▏       | 65/300 [00:25<01:29,  2.62it/s, v_num=1, train_loss_step=2.08e+8, train_loss_epoch=2.27e+8]Epoch 65/300:  22%|██▏       | 65/300 [00:25<01:29,  2.62it/s, v_num=1, train_loss_step=2.2e+8, train_loss_epoch=2.2e+8]  Epoch 66/300:  22%|██▏       | 65/300 [00:25<01:29,  2.62it/s, v_num=1, train_loss_step=2.2e+8, train_loss_epoch=2.2e+8]Epoch 66/300:  22%|██▏       | 66/300 [00:25<01:28,  2.63it/s, v_num=1, train_loss_step=2.2e+8, train_loss_epoch=2.2e+8]Epoch 66/300:  22%|██▏       | 66/300 [00:25<01:28,  2.63it/s, v_num=1, train_loss_step=2.04e+8, train_loss_epoch=2.13e+8]Epoch 67/300:  22%|██▏       | 66/300 [00:25<01:28,  2.63it/s, v_num=1, train_loss_step=2.04e+8, train_loss_epoch=2.13e+8]Epoch 67/300:  22%|██▏       | 67/300 [00:25<01:28,  2.63it/s, v_num=1, train_loss_step=2.04e+8, train_loss_epoch=2.13e+8]Epoch 67/300:  22%|██▏       | 67/300 [00:25<01:28,  2.63it/s, v_num=1, train_loss_step=1.84e+8, train_loss_epoch=2.07e+8]Epoch 68/300:  22%|██▏       | 67/300 [00:25<01:28,  2.63it/s, v_num=1, train_loss_step=1.84e+8, train_loss_epoch=2.07e+8]Epoch 68/300:  23%|██▎       | 68/300 [00:26<01:28,  2.64it/s, v_num=1, train_loss_step=1.84e+8, train_loss_epoch=2.07e+8]Epoch 68/300:  23%|██▎       | 68/300 [00:26<01:28,  2.64it/s, v_num=1, train_loss_step=1.86e+8, train_loss_epoch=2e+8]   Epoch 69/300:  23%|██▎       | 68/300 [00:26<01:28,  2.64it/s, v_num=1, train_loss_step=1.86e+8, train_loss_epoch=2e+8]Epoch 69/300:  23%|██▎       | 69/300 [00:26<01:27,  2.64it/s, v_num=1, train_loss_step=1.86e+8, train_loss_epoch=2e+8]Epoch 69/300:  23%|██▎       | 69/300 [00:26<01:27,  2.64it/s, v_num=1, train_loss_step=1.82e+8, train_loss_epoch=1.95e+8]Epoch 70/300:  23%|██▎       | 69/300 [00:26<01:27,  2.64it/s, v_num=1, train_loss_step=1.82e+8, train_loss_epoch=1.95e+8]Epoch 70/300:  23%|██▎       | 70/300 [00:27<01:27,  2.64it/s, v_num=1, train_loss_step=1.82e+8, train_loss_epoch=1.95e+8]Epoch 70/300:  23%|██▎       | 70/300 [00:27<01:27,  2.64it/s, v_num=1, train_loss_step=1.73e+8, train_loss_epoch=1.89e+8]Epoch 71/300:  23%|██▎       | 70/300 [00:27<01:27,  2.64it/s, v_num=1, train_loss_step=1.73e+8, train_loss_epoch=1.89e+8]Epoch 71/300:  24%|██▎       | 71/300 [00:27<01:26,  2.64it/s, v_num=1, train_loss_step=1.73e+8, train_loss_epoch=1.89e+8]Epoch 71/300:  24%|██▎       | 71/300 [00:27<01:26,  2.64it/s, v_num=1, train_loss_step=1.77e+8, train_loss_epoch=1.83e+8]Epoch 72/300:  24%|██▎       | 71/300 [00:27<01:26,  2.64it/s, v_num=1, train_loss_step=1.77e+8, train_loss_epoch=1.83e+8]Epoch 72/300:  24%|██▍       | 72/300 [00:27<01:26,  2.64it/s, v_num=1, train_loss_step=1.77e+8, train_loss_epoch=1.83e+8]Epoch 72/300:  24%|██▍       | 72/300 [00:27<01:26,  2.64it/s, v_num=1, train_loss_step=1.61e+8, train_loss_epoch=1.78e+8]Epoch 73/300:  24%|██▍       | 72/300 [00:27<01:26,  2.64it/s, v_num=1, train_loss_step=1.61e+8, train_loss_epoch=1.78e+8]Epoch 73/300:  24%|██▍       | 73/300 [00:28<01:27,  2.60it/s, v_num=1, train_loss_step=1.61e+8, train_loss_epoch=1.78e+8]Epoch 73/300:  24%|██▍       | 73/300 [00:28<01:27,  2.60it/s, v_num=1, train_loss_step=1.55e+8, train_loss_epoch=1.73e+8]Epoch 74/300:  24%|██▍       | 73/300 [00:28<01:27,  2.60it/s, v_num=1, train_loss_step=1.55e+8, train_loss_epoch=1.73e+8]Epoch 74/300:  25%|██▍       | 74/300 [00:28<01:27,  2.58it/s, v_num=1, train_loss_step=1.55e+8, train_loss_epoch=1.73e+8]Epoch 74/300:  25%|██▍       | 74/300 [00:28<01:27,  2.58it/s, v_num=1, train_loss_step=1.63e+8, train_loss_epoch=1.68e+8]Epoch 75/300:  25%|██▍       | 74/300 [00:28<01:27,  2.58it/s, v_num=1, train_loss_step=1.63e+8, train_loss_epoch=1.68e+8]Epoch 75/300:  25%|██▌       | 75/300 [00:29<01:26,  2.60it/s, v_num=1, train_loss_step=1.63e+8, train_loss_epoch=1.68e+8]Epoch 75/300:  25%|██▌       | 75/300 [00:29<01:26,  2.60it/s, v_num=1, train_loss_step=1.53e+8, train_loss_epoch=1.64e+8]Epoch 76/300:  25%|██▌       | 75/300 [00:29<01:26,  2.60it/s, v_num=1, train_loss_step=1.53e+8, train_loss_epoch=1.64e+8]Epoch 76/300:  25%|██▌       | 76/300 [00:29<01:25,  2.61it/s, v_num=1, train_loss_step=1.53e+8, train_loss_epoch=1.64e+8]Epoch 76/300:  25%|██▌       | 76/300 [00:29<01:25,  2.61it/s, v_num=1, train_loss_step=1.49e+8, train_loss_epoch=1.59e+8]Epoch 77/300:  25%|██▌       | 76/300 [00:29<01:25,  2.61it/s, v_num=1, train_loss_step=1.49e+8, train_loss_epoch=1.59e+8]Epoch 77/300:  26%|██▌       | 77/300 [00:29<01:25,  2.61it/s, v_num=1, train_loss_step=1.49e+8, train_loss_epoch=1.59e+8]Epoch 77/300:  26%|██▌       | 77/300 [00:29<01:25,  2.61it/s, v_num=1, train_loss_step=1.44e+8, train_loss_epoch=1.55e+8]Epoch 78/300:  26%|██▌       | 77/300 [00:29<01:25,  2.61it/s, v_num=1, train_loss_step=1.44e+8, train_loss_epoch=1.55e+8]Epoch 78/300:  26%|██▌       | 78/300 [00:30<01:24,  2.63it/s, v_num=1, train_loss_step=1.44e+8, train_loss_epoch=1.55e+8]Epoch 78/300:  26%|██▌       | 78/300 [00:30<01:24,  2.63it/s, v_num=1, train_loss_step=1.45e+8, train_loss_epoch=1.51e+8]Epoch 79/300:  26%|██▌       | 78/300 [00:30<01:24,  2.63it/s, v_num=1, train_loss_step=1.45e+8, train_loss_epoch=1.51e+8]Epoch 79/300:  26%|██▋       | 79/300 [00:30<01:23,  2.63it/s, v_num=1, train_loss_step=1.45e+8, train_loss_epoch=1.51e+8]Epoch 79/300:  26%|██▋       | 79/300 [00:30<01:23,  2.63it/s, v_num=1, train_loss_step=1.38e+8, train_loss_epoch=1.47e+8]Epoch 80/300:  26%|██▋       | 79/300 [00:30<01:23,  2.63it/s, v_num=1, train_loss_step=1.38e+8, train_loss_epoch=1.47e+8]Epoch 80/300:  27%|██▋       | 80/300 [00:30<01:23,  2.63it/s, v_num=1, train_loss_step=1.38e+8, train_loss_epoch=1.47e+8]Epoch 80/300:  27%|██▋       | 80/300 [00:30<01:23,  2.63it/s, v_num=1, train_loss_step=1.37e+8, train_loss_epoch=1.43e+8]Epoch 81/300:  27%|██▋       | 80/300 [00:30<01:23,  2.63it/s, v_num=1, train_loss_step=1.37e+8, train_loss_epoch=1.43e+8]Epoch 81/300:  27%|██▋       | 81/300 [00:31<01:24,  2.61it/s, v_num=1, train_loss_step=1.37e+8, train_loss_epoch=1.43e+8]Epoch 81/300:  27%|██▋       | 81/300 [00:31<01:24,  2.61it/s, v_num=1, train_loss_step=1.26e+8, train_loss_epoch=1.4e+8] Epoch 82/300:  27%|██▋       | 81/300 [00:31<01:24,  2.61it/s, v_num=1, train_loss_step=1.26e+8, train_loss_epoch=1.4e+8]Epoch 82/300:  27%|██▋       | 82/300 [00:31<01:23,  2.62it/s, v_num=1, train_loss_step=1.26e+8, train_loss_epoch=1.4e+8]Epoch 82/300:  27%|██▋       | 82/300 [00:31<01:23,  2.62it/s, v_num=1, train_loss_step=1.33e+8, train_loss_epoch=1.36e+8]Epoch 83/300:  27%|██▋       | 82/300 [00:31<01:23,  2.62it/s, v_num=1, train_loss_step=1.33e+8, train_loss_epoch=1.36e+8]Epoch 83/300:  28%|██▊       | 83/300 [00:32<01:22,  2.63it/s, v_num=1, train_loss_step=1.33e+8, train_loss_epoch=1.36e+8]Epoch 83/300:  28%|██▊       | 83/300 [00:32<01:22,  2.63it/s, v_num=1, train_loss_step=1.28e+8, train_loss_epoch=1.33e+8]Epoch 84/300:  28%|██▊       | 83/300 [00:32<01:22,  2.63it/s, v_num=1, train_loss_step=1.28e+8, train_loss_epoch=1.33e+8]Epoch 84/300:  28%|██▊       | 84/300 [00:32<01:22,  2.63it/s, v_num=1, train_loss_step=1.28e+8, train_loss_epoch=1.33e+8]Epoch 84/300:  28%|██▊       | 84/300 [00:32<01:22,  2.63it/s, v_num=1, train_loss_step=1.2e+8, train_loss_epoch=1.29e+8] Epoch 85/300:  28%|██▊       | 84/300 [00:32<01:22,  2.63it/s, v_num=1, train_loss_step=1.2e+8, train_loss_epoch=1.29e+8]Epoch 85/300:  28%|██▊       | 85/300 [00:32<01:21,  2.63it/s, v_num=1, train_loss_step=1.2e+8, train_loss_epoch=1.29e+8]Epoch 85/300:  28%|██▊       | 85/300 [00:32<01:21,  2.63it/s, v_num=1, train_loss_step=1.16e+8, train_loss_epoch=1.26e+8]Epoch 86/300:  28%|██▊       | 85/300 [00:32<01:21,  2.63it/s, v_num=1, train_loss_step=1.16e+8, train_loss_epoch=1.26e+8]Epoch 86/300:  29%|██▊       | 86/300 [00:33<01:21,  2.64it/s, v_num=1, train_loss_step=1.16e+8, train_loss_epoch=1.26e+8]Epoch 86/300:  29%|██▊       | 86/300 [00:33<01:21,  2.64it/s, v_num=1, train_loss_step=1.14e+8, train_loss_epoch=1.23e+8]Epoch 87/300:  29%|██▊       | 86/300 [00:33<01:21,  2.64it/s, v_num=1, train_loss_step=1.14e+8, train_loss_epoch=1.23e+8]Epoch 87/300:  29%|██▉       | 87/300 [00:33<01:22,  2.60it/s, v_num=1, train_loss_step=1.14e+8, train_loss_epoch=1.23e+8]Epoch 87/300:  29%|██▉       | 87/300 [00:33<01:22,  2.60it/s, v_num=1, train_loss_step=1.18e+8, train_loss_epoch=1.2e+8] Epoch 88/300:  29%|██▉       | 87/300 [00:33<01:22,  2.60it/s, v_num=1, train_loss_step=1.18e+8, train_loss_epoch=1.2e+8]Epoch 88/300:  29%|██▉       | 88/300 [00:33<01:22,  2.58it/s, v_num=1, train_loss_step=1.18e+8, train_loss_epoch=1.2e+8]Epoch 88/300:  29%|██▉       | 88/300 [00:33<01:22,  2.58it/s, v_num=1, train_loss_step=1.07e+8, train_loss_epoch=1.17e+8]Epoch 89/300:  29%|██▉       | 88/300 [00:33<01:22,  2.58it/s, v_num=1, train_loss_step=1.07e+8, train_loss_epoch=1.17e+8]Epoch 89/300:  30%|██▉       | 89/300 [00:34<01:21,  2.59it/s, v_num=1, train_loss_step=1.07e+8, train_loss_epoch=1.17e+8]Epoch 89/300:  30%|██▉       | 89/300 [00:34<01:21,  2.59it/s, v_num=1, train_loss_step=1.11e+8, train_loss_epoch=1.14e+8]Epoch 90/300:  30%|██▉       | 89/300 [00:34<01:21,  2.59it/s, v_num=1, train_loss_step=1.11e+8, train_loss_epoch=1.14e+8]Epoch 90/300:  30%|███       | 90/300 [00:34<01:20,  2.61it/s, v_num=1, train_loss_step=1.11e+8, train_loss_epoch=1.14e+8]Epoch 90/300:  30%|███       | 90/300 [00:34<01:20,  2.61it/s, v_num=1, train_loss_step=1.06e+8, train_loss_epoch=1.12e+8]Epoch 91/300:  30%|███       | 90/300 [00:34<01:20,  2.61it/s, v_num=1, train_loss_step=1.06e+8, train_loss_epoch=1.12e+8]Epoch 91/300:  30%|███       | 91/300 [00:35<01:19,  2.62it/s, v_num=1, train_loss_step=1.06e+8, train_loss_epoch=1.12e+8]Epoch 91/300:  30%|███       | 91/300 [00:35<01:19,  2.62it/s, v_num=1, train_loss_step=1.04e+8, train_loss_epoch=1.09e+8]Epoch 92/300:  30%|███       | 91/300 [00:35<01:19,  2.62it/s, v_num=1, train_loss_step=1.04e+8, train_loss_epoch=1.09e+8]Epoch 92/300:  31%|███       | 92/300 [00:35<01:19,  2.63it/s, v_num=1, train_loss_step=1.04e+8, train_loss_epoch=1.09e+8]Epoch 92/300:  31%|███       | 92/300 [00:35<01:19,  2.63it/s, v_num=1, train_loss_step=1.02e+8, train_loss_epoch=1.07e+8]Epoch 93/300:  31%|███       | 92/300 [00:35<01:19,  2.63it/s, v_num=1, train_loss_step=1.02e+8, train_loss_epoch=1.07e+8]Epoch 93/300:  31%|███       | 93/300 [00:35<01:18,  2.63it/s, v_num=1, train_loss_step=1.02e+8, train_loss_epoch=1.07e+8]Epoch 93/300:  31%|███       | 93/300 [00:35<01:18,  2.63it/s, v_num=1, train_loss_step=1e+8, train_loss_epoch=1.04e+8]   Epoch 94/300:  31%|███       | 93/300 [00:35<01:18,  2.63it/s, v_num=1, train_loss_step=1e+8, train_loss_epoch=1.04e+8]Epoch 94/300:  31%|███▏      | 94/300 [00:36<01:18,  2.62it/s, v_num=1, train_loss_step=1e+8, train_loss_epoch=1.04e+8]Epoch 94/300:  31%|███▏      | 94/300 [00:36<01:18,  2.62it/s, v_num=1, train_loss_step=9.63e+7, train_loss_epoch=1.02e+8]Epoch 95/300:  31%|███▏      | 94/300 [00:36<01:18,  2.62it/s, v_num=1, train_loss_step=9.63e+7, train_loss_epoch=1.02e+8]Epoch 95/300:  32%|███▏      | 95/300 [00:36<01:17,  2.63it/s, v_num=1, train_loss_step=9.63e+7, train_loss_epoch=1.02e+8]Epoch 95/300:  32%|███▏      | 95/300 [00:36<01:17,  2.63it/s, v_num=1, train_loss_step=9.87e+7, train_loss_epoch=9.97e+7]Epoch 96/300:  32%|███▏      | 95/300 [00:36<01:17,  2.63it/s, v_num=1, train_loss_step=9.87e+7, train_loss_epoch=9.97e+7]Epoch 96/300:  32%|███▏      | 96/300 [00:37<01:17,  2.63it/s, v_num=1, train_loss_step=9.87e+7, train_loss_epoch=9.97e+7]Epoch 96/300:  32%|███▏      | 96/300 [00:37<01:17,  2.63it/s, v_num=1, train_loss_step=9.37e+7, train_loss_epoch=9.75e+7]Epoch 97/300:  32%|███▏      | 96/300 [00:37<01:17,  2.63it/s, v_num=1, train_loss_step=9.37e+7, train_loss_epoch=9.75e+7]Epoch 97/300:  32%|███▏      | 97/300 [00:37<01:17,  2.62it/s, v_num=1, train_loss_step=9.37e+7, train_loss_epoch=9.75e+7]Epoch 97/300:  32%|███▏      | 97/300 [00:37<01:17,  2.62it/s, v_num=1, train_loss_step=9.35e+7, train_loss_epoch=9.54e+7]Epoch 98/300:  32%|███▏      | 97/300 [00:37<01:17,  2.62it/s, v_num=1, train_loss_step=9.35e+7, train_loss_epoch=9.54e+7]Epoch 98/300:  33%|███▎      | 98/300 [00:37<01:17,  2.60it/s, v_num=1, train_loss_step=9.35e+7, train_loss_epoch=9.54e+7]Epoch 98/300:  33%|███▎      | 98/300 [00:37<01:17,  2.60it/s, v_num=1, train_loss_step=8.84e+7, train_loss_epoch=9.33e+7]Epoch 99/300:  33%|███▎      | 98/300 [00:37<01:17,  2.60it/s, v_num=1, train_loss_step=8.84e+7, train_loss_epoch=9.33e+7]Epoch 99/300:  33%|███▎      | 99/300 [00:38<01:17,  2.60it/s, v_num=1, train_loss_step=8.84e+7, train_loss_epoch=9.33e+7]Epoch 99/300:  33%|███▎      | 99/300 [00:38<01:17,  2.60it/s, v_num=1, train_loss_step=8.52e+7, train_loss_epoch=9.13e+7]Epoch 100/300:  33%|███▎      | 99/300 [00:38<01:17,  2.60it/s, v_num=1, train_loss_step=8.52e+7, train_loss_epoch=9.13e+7]Epoch 100/300:  33%|███▎      | 100/300 [00:38<01:17,  2.59it/s, v_num=1, train_loss_step=8.52e+7, train_loss_epoch=9.13e+7]Epoch 100/300:  33%|███▎      | 100/300 [00:38<01:17,  2.59it/s, v_num=1, train_loss_step=8.35e+7, train_loss_epoch=8.94e+7]Epoch 101/300:  33%|███▎      | 100/300 [00:38<01:17,  2.59it/s, v_num=1, train_loss_step=8.35e+7, train_loss_epoch=8.94e+7]Epoch 101/300:  34%|███▎      | 101/300 [00:38<01:16,  2.59it/s, v_num=1, train_loss_step=8.35e+7, train_loss_epoch=8.94e+7]Epoch 101/300:  34%|███▎      | 101/300 [00:38<01:16,  2.59it/s, v_num=1, train_loss_step=8.16e+7, train_loss_epoch=8.75e+7]Epoch 102/300:  34%|███▎      | 101/300 [00:38<01:16,  2.59it/s, v_num=1, train_loss_step=8.16e+7, train_loss_epoch=8.75e+7]Epoch 102/300:  34%|███▍      | 102/300 [00:39<01:16,  2.58it/s, v_num=1, train_loss_step=8.16e+7, train_loss_epoch=8.75e+7]Epoch 102/300:  34%|███▍      | 102/300 [00:39<01:16,  2.58it/s, v_num=1, train_loss_step=8.66e+7, train_loss_epoch=8.57e+7]Epoch 103/300:  34%|███▍      | 102/300 [00:39<01:16,  2.58it/s, v_num=1, train_loss_step=8.66e+7, train_loss_epoch=8.57e+7]Epoch 103/300:  34%|███▍      | 103/300 [00:39<01:16,  2.59it/s, v_num=1, train_loss_step=8.66e+7, train_loss_epoch=8.57e+7]Epoch 103/300:  34%|███▍      | 103/300 [00:39<01:16,  2.59it/s, v_num=1, train_loss_step=8.12e+7, train_loss_epoch=8.39e+7]Epoch 104/300:  34%|███▍      | 103/300 [00:39<01:16,  2.59it/s, v_num=1, train_loss_step=8.12e+7, train_loss_epoch=8.39e+7]Epoch 104/300:  35%|███▍      | 104/300 [00:40<01:15,  2.60it/s, v_num=1, train_loss_step=8.12e+7, train_loss_epoch=8.39e+7]Epoch 104/300:  35%|███▍      | 104/300 [00:40<01:15,  2.60it/s, v_num=1, train_loss_step=7.34e+7, train_loss_epoch=8.22e+7]Epoch 105/300:  35%|███▍      | 104/300 [00:40<01:15,  2.60it/s, v_num=1, train_loss_step=7.34e+7, train_loss_epoch=8.22e+7]Epoch 105/300:  35%|███▌      | 105/300 [00:40<01:14,  2.62it/s, v_num=1, train_loss_step=7.34e+7, train_loss_epoch=8.22e+7]Epoch 105/300:  35%|███▌      | 105/300 [00:40<01:14,  2.62it/s, v_num=1, train_loss_step=7.59e+7, train_loss_epoch=8.06e+7]Epoch 106/300:  35%|███▌      | 105/300 [00:40<01:14,  2.62it/s, v_num=1, train_loss_step=7.59e+7, train_loss_epoch=8.06e+7]Epoch 106/300:  35%|███▌      | 106/300 [00:40<01:14,  2.61it/s, v_num=1, train_loss_step=7.59e+7, train_loss_epoch=8.06e+7]Epoch 106/300:  35%|███▌      | 106/300 [00:40<01:14,  2.61it/s, v_num=1, train_loss_step=7.41e+7, train_loss_epoch=7.89e+7]Epoch 107/300:  35%|███▌      | 106/300 [00:40<01:14,  2.61it/s, v_num=1, train_loss_step=7.41e+7, train_loss_epoch=7.89e+7]Epoch 107/300:  36%|███▌      | 107/300 [00:41<01:14,  2.61it/s, v_num=1, train_loss_step=7.41e+7, train_loss_epoch=7.89e+7]Epoch 107/300:  36%|███▌      | 107/300 [00:41<01:14,  2.61it/s, v_num=1, train_loss_step=7.73e+7, train_loss_epoch=7.74e+7]Epoch 108/300:  36%|███▌      | 107/300 [00:41<01:14,  2.61it/s, v_num=1, train_loss_step=7.73e+7, train_loss_epoch=7.74e+7]Epoch 108/300:  36%|███▌      | 108/300 [00:41<01:13,  2.63it/s, v_num=1, train_loss_step=7.73e+7, train_loss_epoch=7.74e+7]Epoch 108/300:  36%|███▌      | 108/300 [00:41<01:13,  2.63it/s, v_num=1, train_loss_step=6.82e+7, train_loss_epoch=7.59e+7]Epoch 109/300:  36%|███▌      | 108/300 [00:41<01:13,  2.63it/s, v_num=1, train_loss_step=6.82e+7, train_loss_epoch=7.59e+7]Epoch 109/300:  36%|███▋      | 109/300 [00:42<01:12,  2.63it/s, v_num=1, train_loss_step=6.82e+7, train_loss_epoch=7.59e+7]Epoch 109/300:  36%|███▋      | 109/300 [00:42<01:12,  2.63it/s, v_num=1, train_loss_step=6.67e+7, train_loss_epoch=7.44e+7]Epoch 110/300:  36%|███▋      | 109/300 [00:42<01:12,  2.63it/s, v_num=1, train_loss_step=6.67e+7, train_loss_epoch=7.44e+7]Epoch 110/300:  37%|███▋      | 110/300 [00:42<01:12,  2.63it/s, v_num=1, train_loss_step=6.67e+7, train_loss_epoch=7.44e+7]Epoch 110/300:  37%|███▋      | 110/300 [00:42<01:12,  2.63it/s, v_num=1, train_loss_step=6.9e+7, train_loss_epoch=7.29e+7] Epoch 111/300:  37%|███▋      | 110/300 [00:42<01:12,  2.63it/s, v_num=1, train_loss_step=6.9e+7, train_loss_epoch=7.29e+7]Epoch 111/300:  37%|███▋      | 111/300 [00:42<01:11,  2.63it/s, v_num=1, train_loss_step=6.9e+7, train_loss_epoch=7.29e+7]Epoch 111/300:  37%|███▋      | 111/300 [00:42<01:11,  2.63it/s, v_num=1, train_loss_step=7.17e+7, train_loss_epoch=7.15e+7]Epoch 112/300:  37%|███▋      | 111/300 [00:42<01:11,  2.63it/s, v_num=1, train_loss_step=7.17e+7, train_loss_epoch=7.15e+7]Epoch 112/300:  37%|███▋      | 112/300 [00:43<01:11,  2.63it/s, v_num=1, train_loss_step=7.17e+7, train_loss_epoch=7.15e+7]Epoch 112/300:  37%|███▋      | 112/300 [00:43<01:11,  2.63it/s, v_num=1, train_loss_step=6.48e+7, train_loss_epoch=7.02e+7]Epoch 113/300:  37%|███▋      | 112/300 [00:43<01:11,  2.63it/s, v_num=1, train_loss_step=6.48e+7, train_loss_epoch=7.02e+7]Epoch 113/300:  38%|███▊      | 113/300 [00:43<01:10,  2.64it/s, v_num=1, train_loss_step=6.48e+7, train_loss_epoch=7.02e+7]Epoch 113/300:  38%|███▊      | 113/300 [00:43<01:10,  2.64it/s, v_num=1, train_loss_step=6.53e+7, train_loss_epoch=6.89e+7]Epoch 114/300:  38%|███▊      | 113/300 [00:43<01:10,  2.64it/s, v_num=1, train_loss_step=6.53e+7, train_loss_epoch=6.89e+7]Epoch 114/300:  38%|███▊      | 114/300 [00:43<01:11,  2.60it/s, v_num=1, train_loss_step=6.53e+7, train_loss_epoch=6.89e+7]Epoch 114/300:  38%|███▊      | 114/300 [00:43<01:11,  2.60it/s, v_num=1, train_loss_step=6.26e+7, train_loss_epoch=6.76e+7]Epoch 115/300:  38%|███▊      | 114/300 [00:43<01:11,  2.60it/s, v_num=1, train_loss_step=6.26e+7, train_loss_epoch=6.76e+7]Epoch 115/300:  38%|███▊      | 115/300 [00:44<01:11,  2.58it/s, v_num=1, train_loss_step=6.26e+7, train_loss_epoch=6.76e+7]Epoch 115/300:  38%|███▊      | 115/300 [00:44<01:11,  2.58it/s, v_num=1, train_loss_step=6.83e+7, train_loss_epoch=6.63e+7]Epoch 116/300:  38%|███▊      | 115/300 [00:44<01:11,  2.58it/s, v_num=1, train_loss_step=6.83e+7, train_loss_epoch=6.63e+7]Epoch 116/300:  39%|███▊      | 116/300 [00:44<01:11,  2.59it/s, v_num=1, train_loss_step=6.83e+7, train_loss_epoch=6.63e+7]Epoch 116/300:  39%|███▊      | 116/300 [00:44<01:11,  2.59it/s, v_num=1, train_loss_step=6.03e+7, train_loss_epoch=6.51e+7]Epoch 117/300:  39%|███▊      | 116/300 [00:44<01:11,  2.59it/s, v_num=1, train_loss_step=6.03e+7, train_loss_epoch=6.51e+7]Epoch 117/300:  39%|███▉      | 117/300 [00:45<01:10,  2.61it/s, v_num=1, train_loss_step=6.03e+7, train_loss_epoch=6.51e+7]Epoch 117/300:  39%|███▉      | 117/300 [00:45<01:10,  2.61it/s, v_num=1, train_loss_step=6.1e+7, train_loss_epoch=6.4e+7]  Epoch 118/300:  39%|███▉      | 117/300 [00:45<01:10,  2.61it/s, v_num=1, train_loss_step=6.1e+7, train_loss_epoch=6.4e+7]Epoch 118/300:  39%|███▉      | 118/300 [00:45<01:09,  2.61it/s, v_num=1, train_loss_step=6.1e+7, train_loss_epoch=6.4e+7]Epoch 118/300:  39%|███▉      | 118/300 [00:45<01:09,  2.61it/s, v_num=1, train_loss_step=6.25e+7, train_loss_epoch=6.28e+7]Epoch 119/300:  39%|███▉      | 118/300 [00:45<01:09,  2.61it/s, v_num=1, train_loss_step=6.25e+7, train_loss_epoch=6.28e+7]Epoch 119/300:  40%|███▉      | 119/300 [00:45<01:09,  2.62it/s, v_num=1, train_loss_step=6.25e+7, train_loss_epoch=6.28e+7]Epoch 119/300:  40%|███▉      | 119/300 [00:45<01:09,  2.62it/s, v_num=1, train_loss_step=5.68e+7, train_loss_epoch=6.17e+7]Epoch 120/300:  40%|███▉      | 119/300 [00:45<01:09,  2.62it/s, v_num=1, train_loss_step=5.68e+7, train_loss_epoch=6.17e+7]Epoch 120/300:  40%|████      | 120/300 [00:46<01:08,  2.63it/s, v_num=1, train_loss_step=5.68e+7, train_loss_epoch=6.17e+7]Epoch 120/300:  40%|████      | 120/300 [00:46<01:08,  2.63it/s, v_num=1, train_loss_step=5.82e+7, train_loss_epoch=6.06e+7]Epoch 121/300:  40%|████      | 120/300 [00:46<01:08,  2.63it/s, v_num=1, train_loss_step=5.82e+7, train_loss_epoch=6.06e+7]Epoch 121/300:  40%|████      | 121/300 [00:46<01:08,  2.63it/s, v_num=1, train_loss_step=5.82e+7, train_loss_epoch=6.06e+7]Epoch 121/300:  40%|████      | 121/300 [00:46<01:08,  2.63it/s, v_num=1, train_loss_step=5.62e+7, train_loss_epoch=5.95e+7]Epoch 122/300:  40%|████      | 121/300 [00:46<01:08,  2.63it/s, v_num=1, train_loss_step=5.62e+7, train_loss_epoch=5.95e+7]Epoch 122/300:  41%|████      | 122/300 [00:46<01:07,  2.64it/s, v_num=1, train_loss_step=5.62e+7, train_loss_epoch=5.95e+7]Epoch 122/300:  41%|████      | 122/300 [00:46<01:07,  2.64it/s, v_num=1, train_loss_step=5.38e+7, train_loss_epoch=5.85e+7]Epoch 123/300:  41%|████      | 122/300 [00:46<01:07,  2.64it/s, v_num=1, train_loss_step=5.38e+7, train_loss_epoch=5.85e+7]Epoch 123/300:  41%|████      | 123/300 [00:47<01:06,  2.65it/s, v_num=1, train_loss_step=5.38e+7, train_loss_epoch=5.85e+7]Epoch 123/300:  41%|████      | 123/300 [00:47<01:06,  2.65it/s, v_num=1, train_loss_step=5.36e+7, train_loss_epoch=5.75e+7]Epoch 124/300:  41%|████      | 123/300 [00:47<01:06,  2.65it/s, v_num=1, train_loss_step=5.36e+7, train_loss_epoch=5.75e+7]Epoch 124/300:  41%|████▏     | 124/300 [00:47<01:06,  2.64it/s, v_num=1, train_loss_step=5.36e+7, train_loss_epoch=5.75e+7]Epoch 124/300:  41%|████▏     | 124/300 [00:47<01:06,  2.64it/s, v_num=1, train_loss_step=5.41e+7, train_loss_epoch=5.65e+7]Epoch 125/300:  41%|████▏     | 124/300 [00:47<01:06,  2.64it/s, v_num=1, train_loss_step=5.41e+7, train_loss_epoch=5.65e+7]Epoch 125/300:  42%|████▏     | 125/300 [00:48<01:06,  2.64it/s, v_num=1, train_loss_step=5.41e+7, train_loss_epoch=5.65e+7]Epoch 125/300:  42%|████▏     | 125/300 [00:48<01:06,  2.64it/s, v_num=1, train_loss_step=4.93e+7, train_loss_epoch=5.56e+7]Epoch 126/300:  42%|████▏     | 125/300 [00:48<01:06,  2.64it/s, v_num=1, train_loss_step=4.93e+7, train_loss_epoch=5.56e+7]Epoch 126/300:  42%|████▏     | 126/300 [00:48<01:06,  2.64it/s, v_num=1, train_loss_step=4.93e+7, train_loss_epoch=5.56e+7]Epoch 126/300:  42%|████▏     | 126/300 [00:48<01:06,  2.64it/s, v_num=1, train_loss_step=5.14e+7, train_loss_epoch=5.46e+7]Epoch 127/300:  42%|████▏     | 126/300 [00:48<01:06,  2.64it/s, v_num=1, train_loss_step=5.14e+7, train_loss_epoch=5.46e+7]Epoch 127/300:  42%|████▏     | 127/300 [00:48<01:05,  2.64it/s, v_num=1, train_loss_step=5.14e+7, train_loss_epoch=5.46e+7]Epoch 127/300:  42%|████▏     | 127/300 [00:48<01:05,  2.64it/s, v_num=1, train_loss_step=4.99e+7, train_loss_epoch=5.37e+7]Epoch 128/300:  42%|████▏     | 127/300 [00:48<01:05,  2.64it/s, v_num=1, train_loss_step=4.99e+7, train_loss_epoch=5.37e+7]Epoch 128/300:  43%|████▎     | 128/300 [00:49<01:06,  2.58it/s, v_num=1, train_loss_step=4.99e+7, train_loss_epoch=5.37e+7]Epoch 128/300:  43%|████▎     | 128/300 [00:49<01:06,  2.58it/s, v_num=1, train_loss_step=4.84e+7, train_loss_epoch=5.29e+7]Epoch 129/300:  43%|████▎     | 128/300 [00:49<01:06,  2.58it/s, v_num=1, train_loss_step=4.84e+7, train_loss_epoch=5.29e+7]Epoch 129/300:  43%|████▎     | 129/300 [00:49<01:06,  2.56it/s, v_num=1, train_loss_step=4.84e+7, train_loss_epoch=5.29e+7]Epoch 129/300:  43%|████▎     | 129/300 [00:49<01:06,  2.56it/s, v_num=1, train_loss_step=4.81e+7, train_loss_epoch=5.2e+7] Epoch 130/300:  43%|████▎     | 129/300 [00:49<01:06,  2.56it/s, v_num=1, train_loss_step=4.81e+7, train_loss_epoch=5.2e+7]Epoch 130/300:  43%|████▎     | 130/300 [00:50<01:05,  2.58it/s, v_num=1, train_loss_step=4.81e+7, train_loss_epoch=5.2e+7]Epoch 130/300:  43%|████▎     | 130/300 [00:50<01:05,  2.58it/s, v_num=1, train_loss_step=4.76e+7, train_loss_epoch=5.12e+7]Epoch 131/300:  43%|████▎     | 130/300 [00:50<01:05,  2.58it/s, v_num=1, train_loss_step=4.76e+7, train_loss_epoch=5.12e+7]Epoch 131/300:  44%|████▎     | 131/300 [00:50<01:05,  2.59it/s, v_num=1, train_loss_step=4.76e+7, train_loss_epoch=5.12e+7]Epoch 131/300:  44%|████▎     | 131/300 [00:50<01:05,  2.59it/s, v_num=1, train_loss_step=4.47e+7, train_loss_epoch=5.03e+7]Epoch 132/300:  44%|████▎     | 131/300 [00:50<01:05,  2.59it/s, v_num=1, train_loss_step=4.47e+7, train_loss_epoch=5.03e+7]Epoch 132/300:  44%|████▍     | 132/300 [00:50<01:04,  2.62it/s, v_num=1, train_loss_step=4.47e+7, train_loss_epoch=5.03e+7]Epoch 132/300:  44%|████▍     | 132/300 [00:50<01:04,  2.62it/s, v_num=1, train_loss_step=4.68e+7, train_loss_epoch=4.95e+7]Epoch 133/300:  44%|████▍     | 132/300 [00:50<01:04,  2.62it/s, v_num=1, train_loss_step=4.68e+7, train_loss_epoch=4.95e+7]Epoch 133/300:  44%|████▍     | 133/300 [00:51<01:04,  2.61it/s, v_num=1, train_loss_step=4.68e+7, train_loss_epoch=4.95e+7]Epoch 133/300:  44%|████▍     | 133/300 [00:51<01:04,  2.61it/s, v_num=1, train_loss_step=4.57e+7, train_loss_epoch=4.88e+7]Epoch 134/300:  44%|████▍     | 133/300 [00:51<01:04,  2.61it/s, v_num=1, train_loss_step=4.57e+7, train_loss_epoch=4.88e+7]Epoch 134/300:  45%|████▍     | 134/300 [00:51<01:03,  2.62it/s, v_num=1, train_loss_step=4.57e+7, train_loss_epoch=4.88e+7]Epoch 134/300:  45%|████▍     | 134/300 [00:51<01:03,  2.62it/s, v_num=1, train_loss_step=4.53e+7, train_loss_epoch=4.8e+7] Epoch 135/300:  45%|████▍     | 134/300 [00:51<01:03,  2.62it/s, v_num=1, train_loss_step=4.53e+7, train_loss_epoch=4.8e+7]Epoch 135/300:  45%|████▌     | 135/300 [00:51<01:02,  2.63it/s, v_num=1, train_loss_step=4.53e+7, train_loss_epoch=4.8e+7]Epoch 135/300:  45%|████▌     | 135/300 [00:51<01:02,  2.63it/s, v_num=1, train_loss_step=4.28e+7, train_loss_epoch=4.73e+7]Epoch 136/300:  45%|████▌     | 135/300 [00:51<01:02,  2.63it/s, v_num=1, train_loss_step=4.28e+7, train_loss_epoch=4.73e+7]Epoch 136/300:  45%|████▌     | 136/300 [00:52<01:02,  2.62it/s, v_num=1, train_loss_step=4.28e+7, train_loss_epoch=4.73e+7]Epoch 136/300:  45%|████▌     | 136/300 [00:52<01:02,  2.62it/s, v_num=1, train_loss_step=4.49e+7, train_loss_epoch=4.65e+7]Epoch 137/300:  45%|████▌     | 136/300 [00:52<01:02,  2.62it/s, v_num=1, train_loss_step=4.49e+7, train_loss_epoch=4.65e+7]Epoch 137/300:  46%|████▌     | 137/300 [00:52<01:02,  2.63it/s, v_num=1, train_loss_step=4.49e+7, train_loss_epoch=4.65e+7]Epoch 137/300:  46%|████▌     | 137/300 [00:52<01:02,  2.63it/s, v_num=1, train_loss_step=4.45e+7, train_loss_epoch=4.58e+7]Epoch 138/300:  46%|████▌     | 137/300 [00:52<01:02,  2.63it/s, v_num=1, train_loss_step=4.45e+7, train_loss_epoch=4.58e+7]Epoch 138/300:  46%|████▌     | 138/300 [00:53<01:01,  2.63it/s, v_num=1, train_loss_step=4.45e+7, train_loss_epoch=4.58e+7]Epoch 138/300:  46%|████▌     | 138/300 [00:53<01:01,  2.63it/s, v_num=1, train_loss_step=4.45e+7, train_loss_epoch=4.51e+7]Epoch 139/300:  46%|████▌     | 138/300 [00:53<01:01,  2.63it/s, v_num=1, train_loss_step=4.45e+7, train_loss_epoch=4.51e+7]Epoch 139/300:  46%|████▋     | 139/300 [00:53<01:01,  2.62it/s, v_num=1, train_loss_step=4.45e+7, train_loss_epoch=4.51e+7]Epoch 139/300:  46%|████▋     | 139/300 [00:53<01:01,  2.62it/s, v_num=1, train_loss_step=4.22e+7, train_loss_epoch=4.45e+7]Epoch 140/300:  46%|████▋     | 139/300 [00:53<01:01,  2.62it/s, v_num=1, train_loss_step=4.22e+7, train_loss_epoch=4.45e+7]Epoch 140/300:  47%|████▋     | 140/300 [00:53<01:01,  2.62it/s, v_num=1, train_loss_step=4.22e+7, train_loss_epoch=4.45e+7]Epoch 140/300:  47%|████▋     | 140/300 [00:53<01:01,  2.62it/s, v_num=1, train_loss_step=4.17e+7, train_loss_epoch=4.38e+7]Epoch 141/300:  47%|████▋     | 140/300 [00:53<01:01,  2.62it/s, v_num=1, train_loss_step=4.17e+7, train_loss_epoch=4.38e+7]Epoch 141/300:  47%|████▋     | 141/300 [00:54<01:00,  2.64it/s, v_num=1, train_loss_step=4.17e+7, train_loss_epoch=4.38e+7]Epoch 141/300:  47%|████▋     | 141/300 [00:54<01:00,  2.64it/s, v_num=1, train_loss_step=3.78e+7, train_loss_epoch=4.32e+7]Epoch 142/300:  47%|████▋     | 141/300 [00:54<01:00,  2.64it/s, v_num=1, train_loss_step=3.78e+7, train_loss_epoch=4.32e+7]Epoch 142/300:  47%|████▋     | 142/300 [00:54<01:00,  2.60it/s, v_num=1, train_loss_step=3.78e+7, train_loss_epoch=4.32e+7]Epoch 142/300:  47%|████▋     | 142/300 [00:54<01:00,  2.60it/s, v_num=1, train_loss_step=3.91e+7, train_loss_epoch=4.25e+7]Epoch 143/300:  47%|████▋     | 142/300 [00:54<01:00,  2.60it/s, v_num=1, train_loss_step=3.91e+7, train_loss_epoch=4.25e+7]Epoch 143/300:  48%|████▊     | 143/300 [00:55<01:00,  2.58it/s, v_num=1, train_loss_step=3.91e+7, train_loss_epoch=4.25e+7]Epoch 143/300:  48%|████▊     | 143/300 [00:55<01:00,  2.58it/s, v_num=1, train_loss_step=4.08e+7, train_loss_epoch=4.19e+7]Epoch 144/300:  48%|████▊     | 143/300 [00:55<01:00,  2.58it/s, v_num=1, train_loss_step=4.08e+7, train_loss_epoch=4.19e+7]Epoch 144/300:  48%|████▊     | 144/300 [00:55<01:00,  2.59it/s, v_num=1, train_loss_step=4.08e+7, train_loss_epoch=4.19e+7]Epoch 144/300:  48%|████▊     | 144/300 [00:55<01:00,  2.59it/s, v_num=1, train_loss_step=3.81e+7, train_loss_epoch=4.13e+7]Epoch 145/300:  48%|████▊     | 144/300 [00:55<01:00,  2.59it/s, v_num=1, train_loss_step=3.81e+7, train_loss_epoch=4.13e+7]Epoch 145/300:  48%|████▊     | 145/300 [00:55<00:59,  2.61it/s, v_num=1, train_loss_step=3.81e+7, train_loss_epoch=4.13e+7]Epoch 145/300:  48%|████▊     | 145/300 [00:55<00:59,  2.61it/s, v_num=1, train_loss_step=3.9e+7, train_loss_epoch=4.08e+7] Epoch 146/300:  48%|████▊     | 145/300 [00:55<00:59,  2.61it/s, v_num=1, train_loss_step=3.9e+7, train_loss_epoch=4.08e+7]Epoch 146/300:  49%|████▊     | 146/300 [00:56<00:58,  2.61it/s, v_num=1, train_loss_step=3.9e+7, train_loss_epoch=4.08e+7]Epoch 146/300:  49%|████▊     | 146/300 [00:56<00:58,  2.61it/s, v_num=1, train_loss_step=3.84e+7, train_loss_epoch=4.02e+7]Epoch 147/300:  49%|████▊     | 146/300 [00:56<00:58,  2.61it/s, v_num=1, train_loss_step=3.84e+7, train_loss_epoch=4.02e+7]Epoch 147/300:  49%|████▉     | 147/300 [00:56<00:58,  2.62it/s, v_num=1, train_loss_step=3.84e+7, train_loss_epoch=4.02e+7]Epoch 147/300:  49%|████▉     | 147/300 [00:56<00:58,  2.62it/s, v_num=1, train_loss_step=3.81e+7, train_loss_epoch=3.96e+7]Epoch 148/300:  49%|████▉     | 147/300 [00:56<00:58,  2.62it/s, v_num=1, train_loss_step=3.81e+7, train_loss_epoch=3.96e+7]Epoch 148/300:  49%|████▉     | 148/300 [00:56<00:57,  2.63it/s, v_num=1, train_loss_step=3.81e+7, train_loss_epoch=3.96e+7]Epoch 148/300:  49%|████▉     | 148/300 [00:56<00:57,  2.63it/s, v_num=1, train_loss_step=3.74e+7, train_loss_epoch=3.91e+7]Epoch 149/300:  49%|████▉     | 148/300 [00:56<00:57,  2.63it/s, v_num=1, train_loss_step=3.74e+7, train_loss_epoch=3.91e+7]Epoch 149/300:  50%|████▉     | 149/300 [00:57<00:57,  2.63it/s, v_num=1, train_loss_step=3.74e+7, train_loss_epoch=3.91e+7]Epoch 149/300:  50%|████▉     | 149/300 [00:57<00:57,  2.63it/s, v_num=1, train_loss_step=3.61e+7, train_loss_epoch=3.85e+7]Epoch 150/300:  50%|████▉     | 149/300 [00:57<00:57,  2.63it/s, v_num=1, train_loss_step=3.61e+7, train_loss_epoch=3.85e+7]Epoch 150/300:  50%|█████     | 150/300 [00:57<00:57,  2.63it/s, v_num=1, train_loss_step=3.61e+7, train_loss_epoch=3.85e+7]Epoch 150/300:  50%|█████     | 150/300 [00:57<00:57,  2.63it/s, v_num=1, train_loss_step=3.59e+7, train_loss_epoch=3.8e+7] Epoch 151/300:  50%|█████     | 150/300 [00:57<00:57,  2.63it/s, v_num=1, train_loss_step=3.59e+7, train_loss_epoch=3.8e+7]Epoch 151/300:  50%|█████     | 151/300 [00:58<00:56,  2.63it/s, v_num=1, train_loss_step=3.59e+7, train_loss_epoch=3.8e+7]Epoch 151/300:  50%|█████     | 151/300 [00:58<00:56,  2.63it/s, v_num=1, train_loss_step=3.48e+7, train_loss_epoch=3.75e+7]Epoch 152/300:  50%|█████     | 151/300 [00:58<00:56,  2.63it/s, v_num=1, train_loss_step=3.48e+7, train_loss_epoch=3.75e+7]Epoch 152/300:  51%|█████     | 152/300 [00:58<00:56,  2.63it/s, v_num=1, train_loss_step=3.48e+7, train_loss_epoch=3.75e+7]Epoch 152/300:  51%|█████     | 152/300 [00:58<00:56,  2.63it/s, v_num=1, train_loss_step=3.46e+7, train_loss_epoch=3.7e+7] Epoch 153/300:  51%|█████     | 152/300 [00:58<00:56,  2.63it/s, v_num=1, train_loss_step=3.46e+7, train_loss_epoch=3.7e+7]Epoch 153/300:  51%|█████     | 153/300 [00:58<00:55,  2.63it/s, v_num=1, train_loss_step=3.46e+7, train_loss_epoch=3.7e+7]Epoch 153/300:  51%|█████     | 153/300 [00:58<00:55,  2.63it/s, v_num=1, train_loss_step=3.46e+7, train_loss_epoch=3.65e+7]Epoch 154/300:  51%|█████     | 153/300 [00:58<00:55,  2.63it/s, v_num=1, train_loss_step=3.46e+7, train_loss_epoch=3.65e+7]Epoch 154/300:  51%|█████▏    | 154/300 [00:59<00:55,  2.64it/s, v_num=1, train_loss_step=3.46e+7, train_loss_epoch=3.65e+7]Epoch 154/300:  51%|█████▏    | 154/300 [00:59<00:55,  2.64it/s, v_num=1, train_loss_step=3.37e+7, train_loss_epoch=3.61e+7]Epoch 155/300:  51%|█████▏    | 154/300 [00:59<00:55,  2.64it/s, v_num=1, train_loss_step=3.37e+7, train_loss_epoch=3.61e+7]Epoch 155/300:  52%|█████▏    | 155/300 [00:59<00:55,  2.61it/s, v_num=1, train_loss_step=3.37e+7, train_loss_epoch=3.61e+7]Epoch 155/300:  52%|█████▏    | 155/300 [00:59<00:55,  2.61it/s, v_num=1, train_loss_step=3.26e+7, train_loss_epoch=3.56e+7]Epoch 156/300:  52%|█████▏    | 155/300 [00:59<00:55,  2.61it/s, v_num=1, train_loss_step=3.26e+7, train_loss_epoch=3.56e+7]Epoch 156/300:  52%|█████▏    | 156/300 [00:59<00:55,  2.60it/s, v_num=1, train_loss_step=3.26e+7, train_loss_epoch=3.56e+7]Epoch 156/300:  52%|█████▏    | 156/300 [00:59<00:55,  2.60it/s, v_num=1, train_loss_step=3.13e+7, train_loss_epoch=3.51e+7]Epoch 157/300:  52%|█████▏    | 156/300 [00:59<00:55,  2.60it/s, v_num=1, train_loss_step=3.13e+7, train_loss_epoch=3.51e+7]Epoch 157/300:  52%|█████▏    | 157/300 [01:00<00:55,  2.59it/s, v_num=1, train_loss_step=3.13e+7, train_loss_epoch=3.51e+7]Epoch 157/300:  52%|█████▏    | 157/300 [01:00<00:55,  2.59it/s, v_num=1, train_loss_step=3.01e+7, train_loss_epoch=3.47e+7]Epoch 158/300:  52%|█████▏    | 157/300 [01:00<00:55,  2.59it/s, v_num=1, train_loss_step=3.01e+7, train_loss_epoch=3.47e+7]Epoch 158/300:  53%|█████▎    | 158/300 [01:00<00:54,  2.61it/s, v_num=1, train_loss_step=3.01e+7, train_loss_epoch=3.47e+7]Epoch 158/300:  53%|█████▎    | 158/300 [01:00<00:54,  2.61it/s, v_num=1, train_loss_step=3.36e+7, train_loss_epoch=3.42e+7]Epoch 159/300:  53%|█████▎    | 158/300 [01:00<00:54,  2.61it/s, v_num=1, train_loss_step=3.36e+7, train_loss_epoch=3.42e+7]Epoch 159/300:  53%|█████▎    | 159/300 [01:01<00:54,  2.57it/s, v_num=1, train_loss_step=3.36e+7, train_loss_epoch=3.42e+7]Epoch 159/300:  53%|█████▎    | 159/300 [01:01<00:54,  2.57it/s, v_num=1, train_loss_step=3.15e+7, train_loss_epoch=3.38e+7]Epoch 160/300:  53%|█████▎    | 159/300 [01:01<00:54,  2.57it/s, v_num=1, train_loss_step=3.15e+7, train_loss_epoch=3.38e+7]Epoch 160/300:  53%|█████▎    | 160/300 [01:01<00:54,  2.59it/s, v_num=1, train_loss_step=3.15e+7, train_loss_epoch=3.38e+7]Epoch 160/300:  53%|█████▎    | 160/300 [01:01<00:54,  2.59it/s, v_num=1, train_loss_step=2.94e+7, train_loss_epoch=3.34e+7]Epoch 161/300:  53%|█████▎    | 160/300 [01:01<00:54,  2.59it/s, v_num=1, train_loss_step=2.94e+7, train_loss_epoch=3.34e+7]Epoch 161/300:  54%|█████▎    | 161/300 [01:01<00:53,  2.60it/s, v_num=1, train_loss_step=2.94e+7, train_loss_epoch=3.34e+7]Epoch 161/300:  54%|█████▎    | 161/300 [01:01<00:53,  2.60it/s, v_num=1, train_loss_step=3.19e+7, train_loss_epoch=3.3e+7] Epoch 162/300:  54%|█████▎    | 161/300 [01:01<00:53,  2.60it/s, v_num=1, train_loss_step=3.19e+7, train_loss_epoch=3.3e+7]Epoch 162/300:  54%|█████▍    | 162/300 [01:02<00:52,  2.61it/s, v_num=1, train_loss_step=3.19e+7, train_loss_epoch=3.3e+7]Epoch 162/300:  54%|█████▍    | 162/300 [01:02<00:52,  2.61it/s, v_num=1, train_loss_step=3.08e+7, train_loss_epoch=3.26e+7]Epoch 163/300:  54%|█████▍    | 162/300 [01:02<00:52,  2.61it/s, v_num=1, train_loss_step=3.08e+7, train_loss_epoch=3.26e+7]Epoch 163/300:  54%|█████▍    | 163/300 [01:02<00:52,  2.62it/s, v_num=1, train_loss_step=3.08e+7, train_loss_epoch=3.26e+7]Epoch 163/300:  54%|█████▍    | 163/300 [01:02<00:52,  2.62it/s, v_num=1, train_loss_step=3e+7, train_loss_epoch=3.22e+7]   Epoch 164/300:  54%|█████▍    | 163/300 [01:02<00:52,  2.62it/s, v_num=1, train_loss_step=3e+7, train_loss_epoch=3.22e+7]Epoch 164/300:  55%|█████▍    | 164/300 [01:03<00:51,  2.62it/s, v_num=1, train_loss_step=3e+7, train_loss_epoch=3.22e+7]Epoch 164/300:  55%|█████▍    | 164/300 [01:03<00:51,  2.62it/s, v_num=1, train_loss_step=3e+7, train_loss_epoch=3.18e+7]Epoch 165/300:  55%|█████▍    | 164/300 [01:03<00:51,  2.62it/s, v_num=1, train_loss_step=3e+7, train_loss_epoch=3.18e+7]Epoch 165/300:  55%|█████▌    | 165/300 [01:03<00:51,  2.64it/s, v_num=1, train_loss_step=3e+7, train_loss_epoch=3.18e+7]Epoch 165/300:  55%|█████▌    | 165/300 [01:03<00:51,  2.64it/s, v_num=1, train_loss_step=3.09e+7, train_loss_epoch=3.14e+7]Epoch 166/300:  55%|█████▌    | 165/300 [01:03<00:51,  2.64it/s, v_num=1, train_loss_step=3.09e+7, train_loss_epoch=3.14e+7]Epoch 166/300:  55%|█████▌    | 166/300 [01:03<00:50,  2.64it/s, v_num=1, train_loss_step=3.09e+7, train_loss_epoch=3.14e+7]Epoch 166/300:  55%|█████▌    | 166/300 [01:03<00:50,  2.64it/s, v_num=1, train_loss_step=2.92e+7, train_loss_epoch=3.11e+7]Epoch 167/300:  55%|█████▌    | 166/300 [01:03<00:50,  2.64it/s, v_num=1, train_loss_step=2.92e+7, train_loss_epoch=3.11e+7]Epoch 167/300:  56%|█████▌    | 167/300 [01:04<00:50,  2.64it/s, v_num=1, train_loss_step=2.92e+7, train_loss_epoch=3.11e+7]Epoch 167/300:  56%|█████▌    | 167/300 [01:04<00:50,  2.64it/s, v_num=1, train_loss_step=2.93e+7, train_loss_epoch=3.07e+7]Epoch 168/300:  56%|█████▌    | 167/300 [01:04<00:50,  2.64it/s, v_num=1, train_loss_step=2.93e+7, train_loss_epoch=3.07e+7]Epoch 168/300:  56%|█████▌    | 168/300 [01:04<00:50,  2.63it/s, v_num=1, train_loss_step=2.93e+7, train_loss_epoch=3.07e+7]Epoch 168/300:  56%|█████▌    | 168/300 [01:04<00:50,  2.63it/s, v_num=1, train_loss_step=3.03e+7, train_loss_epoch=3.04e+7]Epoch 169/300:  56%|█████▌    | 168/300 [01:04<00:50,  2.63it/s, v_num=1, train_loss_step=3.03e+7, train_loss_epoch=3.04e+7]Epoch 169/300:  56%|█████▋    | 169/300 [01:04<00:50,  2.61it/s, v_num=1, train_loss_step=3.03e+7, train_loss_epoch=3.04e+7]Epoch 169/300:  56%|█████▋    | 169/300 [01:04<00:50,  2.61it/s, v_num=1, train_loss_step=2.7e+7, train_loss_epoch=3e+7]    Epoch 170/300:  56%|█████▋    | 169/300 [01:04<00:50,  2.61it/s, v_num=1, train_loss_step=2.7e+7, train_loss_epoch=3e+7]Epoch 170/300:  57%|█████▋    | 170/300 [01:05<00:50,  2.56it/s, v_num=1, train_loss_step=2.7e+7, train_loss_epoch=3e+7]Epoch 170/300:  57%|█████▋    | 170/300 [01:05<00:50,  2.56it/s, v_num=1, train_loss_step=2.72e+7, train_loss_epoch=2.97e+7]Epoch 171/300:  57%|█████▋    | 170/300 [01:05<00:50,  2.56it/s, v_num=1, train_loss_step=2.72e+7, train_loss_epoch=2.97e+7]Epoch 171/300:  57%|█████▋    | 171/300 [01:05<00:49,  2.59it/s, v_num=1, train_loss_step=2.72e+7, train_loss_epoch=2.97e+7]Epoch 171/300:  57%|█████▋    | 171/300 [01:05<00:49,  2.59it/s, v_num=1, train_loss_step=3.05e+7, train_loss_epoch=2.94e+7]Epoch 172/300:  57%|█████▋    | 171/300 [01:05<00:49,  2.59it/s, v_num=1, train_loss_step=3.05e+7, train_loss_epoch=2.94e+7]Epoch 172/300:  57%|█████▋    | 172/300 [01:06<00:49,  2.60it/s, v_num=1, train_loss_step=3.05e+7, train_loss_epoch=2.94e+7]Epoch 172/300:  57%|█████▋    | 172/300 [01:06<00:49,  2.60it/s, v_num=1, train_loss_step=2.71e+7, train_loss_epoch=2.9e+7] Epoch 173/300:  57%|█████▋    | 172/300 [01:06<00:49,  2.60it/s, v_num=1, train_loss_step=2.71e+7, train_loss_epoch=2.9e+7]Epoch 173/300:  58%|█████▊    | 173/300 [01:06<00:48,  2.61it/s, v_num=1, train_loss_step=2.71e+7, train_loss_epoch=2.9e+7]Epoch 173/300:  58%|█████▊    | 173/300 [01:06<00:48,  2.61it/s, v_num=1, train_loss_step=2.65e+7, train_loss_epoch=2.87e+7]Epoch 174/300:  58%|█████▊    | 173/300 [01:06<00:48,  2.61it/s, v_num=1, train_loss_step=2.65e+7, train_loss_epoch=2.87e+7]Epoch 174/300:  58%|█████▊    | 174/300 [01:06<00:49,  2.56it/s, v_num=1, train_loss_step=2.65e+7, train_loss_epoch=2.87e+7]Epoch 174/300:  58%|█████▊    | 174/300 [01:06<00:49,  2.56it/s, v_num=1, train_loss_step=2.67e+7, train_loss_epoch=2.84e+7]Epoch 175/300:  58%|█████▊    | 174/300 [01:06<00:49,  2.56it/s, v_num=1, train_loss_step=2.67e+7, train_loss_epoch=2.84e+7]Epoch 175/300:  58%|█████▊    | 175/300 [01:07<00:48,  2.57it/s, v_num=1, train_loss_step=2.67e+7, train_loss_epoch=2.84e+7]Epoch 175/300:  58%|█████▊    | 175/300 [01:07<00:48,  2.57it/s, v_num=1, train_loss_step=2.77e+7, train_loss_epoch=2.81e+7]Epoch 176/300:  58%|█████▊    | 175/300 [01:07<00:48,  2.57it/s, v_num=1, train_loss_step=2.77e+7, train_loss_epoch=2.81e+7]Epoch 176/300:  59%|█████▊    | 176/300 [01:07<00:47,  2.59it/s, v_num=1, train_loss_step=2.77e+7, train_loss_epoch=2.81e+7]Epoch 176/300:  59%|█████▊    | 176/300 [01:07<00:47,  2.59it/s, v_num=1, train_loss_step=2.57e+7, train_loss_epoch=2.78e+7]Epoch 177/300:  59%|█████▊    | 176/300 [01:07<00:47,  2.59it/s, v_num=1, train_loss_step=2.57e+7, train_loss_epoch=2.78e+7]Epoch 177/300:  59%|█████▉    | 177/300 [01:08<00:47,  2.61it/s, v_num=1, train_loss_step=2.57e+7, train_loss_epoch=2.78e+7]Epoch 177/300:  59%|█████▉    | 177/300 [01:08<00:47,  2.61it/s, v_num=1, train_loss_step=2.59e+7, train_loss_epoch=2.75e+7]Epoch 178/300:  59%|█████▉    | 177/300 [01:08<00:47,  2.61it/s, v_num=1, train_loss_step=2.59e+7, train_loss_epoch=2.75e+7]Epoch 178/300:  59%|█████▉    | 178/300 [01:08<00:46,  2.62it/s, v_num=1, train_loss_step=2.59e+7, train_loss_epoch=2.75e+7]Epoch 178/300:  59%|█████▉    | 178/300 [01:08<00:46,  2.62it/s, v_num=1, train_loss_step=2.48e+7, train_loss_epoch=2.72e+7]Epoch 179/300:  59%|█████▉    | 178/300 [01:08<00:46,  2.62it/s, v_num=1, train_loss_step=2.48e+7, train_loss_epoch=2.72e+7]Epoch 179/300:  60%|█████▉    | 179/300 [01:08<00:46,  2.62it/s, v_num=1, train_loss_step=2.48e+7, train_loss_epoch=2.72e+7]Epoch 179/300:  60%|█████▉    | 179/300 [01:08<00:46,  2.62it/s, v_num=1, train_loss_step=2.74e+7, train_loss_epoch=2.69e+7]Epoch 180/300:  60%|█████▉    | 179/300 [01:08<00:46,  2.62it/s, v_num=1, train_loss_step=2.74e+7, train_loss_epoch=2.69e+7]Epoch 180/300:  60%|██████    | 180/300 [01:09<00:45,  2.62it/s, v_num=1, train_loss_step=2.74e+7, train_loss_epoch=2.69e+7]Epoch 180/300:  60%|██████    | 180/300 [01:09<00:45,  2.62it/s, v_num=1, train_loss_step=2.35e+7, train_loss_epoch=2.67e+7]Epoch 181/300:  60%|██████    | 180/300 [01:09<00:45,  2.62it/s, v_num=1, train_loss_step=2.35e+7, train_loss_epoch=2.67e+7]Epoch 181/300:  60%|██████    | 181/300 [01:09<00:45,  2.60it/s, v_num=1, train_loss_step=2.35e+7, train_loss_epoch=2.67e+7]Epoch 181/300:  60%|██████    | 181/300 [01:09<00:45,  2.60it/s, v_num=1, train_loss_step=2.45e+7, train_loss_epoch=2.64e+7]Epoch 182/300:  60%|██████    | 181/300 [01:09<00:45,  2.60it/s, v_num=1, train_loss_step=2.45e+7, train_loss_epoch=2.64e+7]Epoch 182/300:  61%|██████    | 182/300 [01:09<00:45,  2.61it/s, v_num=1, train_loss_step=2.45e+7, train_loss_epoch=2.64e+7]Epoch 182/300:  61%|██████    | 182/300 [01:09<00:45,  2.61it/s, v_num=1, train_loss_step=2.64e+7, train_loss_epoch=2.61e+7]Epoch 183/300:  61%|██████    | 182/300 [01:09<00:45,  2.61it/s, v_num=1, train_loss_step=2.64e+7, train_loss_epoch=2.61e+7]Epoch 183/300:  61%|██████    | 183/300 [01:10<00:45,  2.57it/s, v_num=1, train_loss_step=2.64e+7, train_loss_epoch=2.61e+7]Epoch 183/300:  61%|██████    | 183/300 [01:10<00:45,  2.57it/s, v_num=1, train_loss_step=2.35e+7, train_loss_epoch=2.59e+7]Epoch 184/300:  61%|██████    | 183/300 [01:10<00:45,  2.57it/s, v_num=1, train_loss_step=2.35e+7, train_loss_epoch=2.59e+7]Epoch 184/300:  61%|██████▏   | 184/300 [01:10<00:45,  2.57it/s, v_num=1, train_loss_step=2.35e+7, train_loss_epoch=2.59e+7]Epoch 184/300:  61%|██████▏   | 184/300 [01:10<00:45,  2.57it/s, v_num=1, train_loss_step=2.56e+7, train_loss_epoch=2.56e+7]Epoch 185/300:  61%|██████▏   | 184/300 [01:10<00:45,  2.57it/s, v_num=1, train_loss_step=2.56e+7, train_loss_epoch=2.56e+7]Epoch 185/300:  62%|██████▏   | 185/300 [01:11<00:44,  2.58it/s, v_num=1, train_loss_step=2.56e+7, train_loss_epoch=2.56e+7]Epoch 185/300:  62%|██████▏   | 185/300 [01:11<00:44,  2.58it/s, v_num=1, train_loss_step=2.43e+7, train_loss_epoch=2.54e+7]Epoch 186/300:  62%|██████▏   | 185/300 [01:11<00:44,  2.58it/s, v_num=1, train_loss_step=2.43e+7, train_loss_epoch=2.54e+7]Epoch 186/300:  62%|██████▏   | 186/300 [01:11<00:44,  2.59it/s, v_num=1, train_loss_step=2.43e+7, train_loss_epoch=2.54e+7]Epoch 186/300:  62%|██████▏   | 186/300 [01:11<00:44,  2.59it/s, v_num=1, train_loss_step=2.5e+7, train_loss_epoch=2.51e+7] Epoch 187/300:  62%|██████▏   | 186/300 [01:11<00:44,  2.59it/s, v_num=1, train_loss_step=2.5e+7, train_loss_epoch=2.51e+7]Epoch 187/300:  62%|██████▏   | 187/300 [01:11<00:43,  2.60it/s, v_num=1, train_loss_step=2.5e+7, train_loss_epoch=2.51e+7]Epoch 187/300:  62%|██████▏   | 187/300 [01:11<00:43,  2.60it/s, v_num=1, train_loss_step=2.47e+7, train_loss_epoch=2.49e+7]Epoch 188/300:  62%|██████▏   | 187/300 [01:11<00:43,  2.60it/s, v_num=1, train_loss_step=2.47e+7, train_loss_epoch=2.49e+7]Epoch 188/300:  63%|██████▎   | 188/300 [01:12<00:42,  2.61it/s, v_num=1, train_loss_step=2.47e+7, train_loss_epoch=2.49e+7]Epoch 188/300:  63%|██████▎   | 188/300 [01:12<00:42,  2.61it/s, v_num=1, train_loss_step=2.4e+7, train_loss_epoch=2.46e+7] Epoch 189/300:  63%|██████▎   | 188/300 [01:12<00:42,  2.61it/s, v_num=1, train_loss_step=2.4e+7, train_loss_epoch=2.46e+7]Epoch 189/300:  63%|██████▎   | 189/300 [01:12<00:42,  2.61it/s, v_num=1, train_loss_step=2.4e+7, train_loss_epoch=2.46e+7]Epoch 189/300:  63%|██████▎   | 189/300 [01:12<00:42,  2.61it/s, v_num=1, train_loss_step=2.25e+7, train_loss_epoch=2.44e+7]Epoch 190/300:  63%|██████▎   | 189/300 [01:12<00:42,  2.61it/s, v_num=1, train_loss_step=2.25e+7, train_loss_epoch=2.44e+7]Epoch 190/300:  63%|██████▎   | 190/300 [01:13<00:41,  2.64it/s, v_num=1, train_loss_step=2.25e+7, train_loss_epoch=2.44e+7]Epoch 190/300:  63%|██████▎   | 190/300 [01:13<00:41,  2.64it/s, v_num=1, train_loss_step=2.36e+7, train_loss_epoch=2.42e+7]Epoch 191/300:  63%|██████▎   | 190/300 [01:13<00:41,  2.64it/s, v_num=1, train_loss_step=2.36e+7, train_loss_epoch=2.42e+7]Epoch 191/300:  64%|██████▎   | 191/300 [01:13<00:41,  2.62it/s, v_num=1, train_loss_step=2.36e+7, train_loss_epoch=2.42e+7]Epoch 191/300:  64%|██████▎   | 191/300 [01:13<00:41,  2.62it/s, v_num=1, train_loss_step=2.39e+7, train_loss_epoch=2.4e+7] Epoch 192/300:  64%|██████▎   | 191/300 [01:13<00:41,  2.62it/s, v_num=1, train_loss_step=2.39e+7, train_loss_epoch=2.4e+7]Epoch 192/300:  64%|██████▍   | 192/300 [01:13<00:41,  2.62it/s, v_num=1, train_loss_step=2.39e+7, train_loss_epoch=2.4e+7]Epoch 192/300:  64%|██████▍   | 192/300 [01:13<00:41,  2.62it/s, v_num=1, train_loss_step=2.37e+7, train_loss_epoch=2.38e+7]Epoch 193/300:  64%|██████▍   | 192/300 [01:13<00:41,  2.62it/s, v_num=1, train_loss_step=2.37e+7, train_loss_epoch=2.38e+7]Epoch 193/300:  64%|██████▍   | 193/300 [01:14<00:40,  2.62it/s, v_num=1, train_loss_step=2.37e+7, train_loss_epoch=2.38e+7]Epoch 193/300:  64%|██████▍   | 193/300 [01:14<00:40,  2.62it/s, v_num=1, train_loss_step=2.2e+7, train_loss_epoch=2.35e+7] Epoch 194/300:  64%|██████▍   | 193/300 [01:14<00:40,  2.62it/s, v_num=1, train_loss_step=2.2e+7, train_loss_epoch=2.35e+7]Epoch 194/300:  65%|██████▍   | 194/300 [01:14<00:40,  2.62it/s, v_num=1, train_loss_step=2.2e+7, train_loss_epoch=2.35e+7]Epoch 194/300:  65%|██████▍   | 194/300 [01:14<00:40,  2.62it/s, v_num=1, train_loss_step=2.15e+7, train_loss_epoch=2.33e+7]Epoch 195/300:  65%|██████▍   | 194/300 [01:14<00:40,  2.62it/s, v_num=1, train_loss_step=2.15e+7, train_loss_epoch=2.33e+7]Epoch 195/300:  65%|██████▌   | 195/300 [01:14<00:39,  2.63it/s, v_num=1, train_loss_step=2.15e+7, train_loss_epoch=2.33e+7]Epoch 195/300:  65%|██████▌   | 195/300 [01:14<00:39,  2.63it/s, v_num=1, train_loss_step=2.23e+7, train_loss_epoch=2.31e+7]Epoch 196/300:  65%|██████▌   | 195/300 [01:14<00:39,  2.63it/s, v_num=1, train_loss_step=2.23e+7, train_loss_epoch=2.31e+7]Epoch 196/300:  65%|██████▌   | 196/300 [01:15<00:39,  2.62it/s, v_num=1, train_loss_step=2.23e+7, train_loss_epoch=2.31e+7]Epoch 196/300:  65%|██████▌   | 196/300 [01:15<00:39,  2.62it/s, v_num=1, train_loss_step=2.18e+7, train_loss_epoch=2.29e+7]Epoch 197/300:  65%|██████▌   | 196/300 [01:15<00:39,  2.62it/s, v_num=1, train_loss_step=2.18e+7, train_loss_epoch=2.29e+7]Epoch 197/300:  66%|██████▌   | 197/300 [01:15<00:39,  2.59it/s, v_num=1, train_loss_step=2.18e+7, train_loss_epoch=2.29e+7]Epoch 197/300:  66%|██████▌   | 197/300 [01:15<00:39,  2.59it/s, v_num=1, train_loss_step=2.07e+7, train_loss_epoch=2.27e+7]Epoch 198/300:  66%|██████▌   | 197/300 [01:15<00:39,  2.59it/s, v_num=1, train_loss_step=2.07e+7, train_loss_epoch=2.27e+7]Epoch 198/300:  66%|██████▌   | 198/300 [01:16<00:39,  2.58it/s, v_num=1, train_loss_step=2.07e+7, train_loss_epoch=2.27e+7]Epoch 198/300:  66%|██████▌   | 198/300 [01:16<00:39,  2.58it/s, v_num=1, train_loss_step=2.17e+7, train_loss_epoch=2.25e+7]Epoch 199/300:  66%|██████▌   | 198/300 [01:16<00:39,  2.58it/s, v_num=1, train_loss_step=2.17e+7, train_loss_epoch=2.25e+7]Epoch 199/300:  66%|██████▋   | 199/300 [01:16<00:39,  2.58it/s, v_num=1, train_loss_step=2.17e+7, train_loss_epoch=2.25e+7]Epoch 199/300:  66%|██████▋   | 199/300 [01:16<00:39,  2.58it/s, v_num=1, train_loss_step=2.27e+7, train_loss_epoch=2.23e+7]Epoch 200/300:  66%|██████▋   | 199/300 [01:16<00:39,  2.58it/s, v_num=1, train_loss_step=2.27e+7, train_loss_epoch=2.23e+7]Epoch 200/300:  67%|██████▋   | 200/300 [01:16<00:38,  2.60it/s, v_num=1, train_loss_step=2.27e+7, train_loss_epoch=2.23e+7]Epoch 200/300:  67%|██████▋   | 200/300 [01:16<00:38,  2.60it/s, v_num=1, train_loss_step=2.12e+7, train_loss_epoch=2.22e+7]Epoch 201/300:  67%|██████▋   | 200/300 [01:16<00:38,  2.60it/s, v_num=1, train_loss_step=2.12e+7, train_loss_epoch=2.22e+7]Epoch 201/300:  67%|██████▋   | 201/300 [01:17<00:38,  2.60it/s, v_num=1, train_loss_step=2.12e+7, train_loss_epoch=2.22e+7]Epoch 201/300:  67%|██████▋   | 201/300 [01:17<00:38,  2.60it/s, v_num=1, train_loss_step=2.07e+7, train_loss_epoch=2.2e+7] Epoch 202/300:  67%|██████▋   | 201/300 [01:17<00:38,  2.60it/s, v_num=1, train_loss_step=2.07e+7, train_loss_epoch=2.2e+7]Epoch 202/300:  67%|██████▋   | 202/300 [01:17<00:37,  2.62it/s, v_num=1, train_loss_step=2.07e+7, train_loss_epoch=2.2e+7]Epoch 202/300:  67%|██████▋   | 202/300 [01:17<00:37,  2.62it/s, v_num=1, train_loss_step=1.87e+7, train_loss_epoch=2.18e+7]Epoch 203/300:  67%|██████▋   | 202/300 [01:17<00:37,  2.62it/s, v_num=1, train_loss_step=1.87e+7, train_loss_epoch=2.18e+7]Epoch 203/300:  68%|██████▊   | 203/300 [01:18<00:36,  2.63it/s, v_num=1, train_loss_step=1.87e+7, train_loss_epoch=2.18e+7]Epoch 203/300:  68%|██████▊   | 203/300 [01:18<00:36,  2.63it/s, v_num=1, train_loss_step=1.97e+7, train_loss_epoch=2.16e+7]Epoch 204/300:  68%|██████▊   | 203/300 [01:18<00:36,  2.63it/s, v_num=1, train_loss_step=1.97e+7, train_loss_epoch=2.16e+7]Epoch 204/300:  68%|██████▊   | 204/300 [01:18<00:36,  2.63it/s, v_num=1, train_loss_step=1.97e+7, train_loss_epoch=2.16e+7]Epoch 204/300:  68%|██████▊   | 204/300 [01:18<00:36,  2.63it/s, v_num=1, train_loss_step=2.25e+7, train_loss_epoch=2.14e+7]Epoch 205/300:  68%|██████▊   | 204/300 [01:18<00:36,  2.63it/s, v_num=1, train_loss_step=2.25e+7, train_loss_epoch=2.14e+7]Epoch 205/300:  68%|██████▊   | 205/300 [01:18<00:36,  2.63it/s, v_num=1, train_loss_step=2.25e+7, train_loss_epoch=2.14e+7]Epoch 205/300:  68%|██████▊   | 205/300 [01:18<00:36,  2.63it/s, v_num=1, train_loss_step=2.09e+7, train_loss_epoch=2.13e+7]Epoch 206/300:  68%|██████▊   | 205/300 [01:18<00:36,  2.63it/s, v_num=1, train_loss_step=2.09e+7, train_loss_epoch=2.13e+7]Epoch 206/300:  69%|██████▊   | 206/300 [01:19<00:35,  2.62it/s, v_num=1, train_loss_step=2.09e+7, train_loss_epoch=2.13e+7]Epoch 206/300:  69%|██████▊   | 206/300 [01:19<00:35,  2.62it/s, v_num=1, train_loss_step=2.09e+7, train_loss_epoch=2.11e+7]Epoch 207/300:  69%|██████▊   | 206/300 [01:19<00:35,  2.62it/s, v_num=1, train_loss_step=2.09e+7, train_loss_epoch=2.11e+7]Epoch 207/300:  69%|██████▉   | 207/300 [01:19<00:35,  2.61it/s, v_num=1, train_loss_step=2.09e+7, train_loss_epoch=2.11e+7]Epoch 207/300:  69%|██████▉   | 207/300 [01:19<00:35,  2.61it/s, v_num=1, train_loss_step=1.94e+7, train_loss_epoch=2.09e+7]Epoch 208/300:  69%|██████▉   | 207/300 [01:19<00:35,  2.61it/s, v_num=1, train_loss_step=1.94e+7, train_loss_epoch=2.09e+7]Epoch 208/300:  69%|██████▉   | 208/300 [01:19<00:35,  2.63it/s, v_num=1, train_loss_step=1.94e+7, train_loss_epoch=2.09e+7]Epoch 208/300:  69%|██████▉   | 208/300 [01:19<00:35,  2.63it/s, v_num=1, train_loss_step=2.03e+7, train_loss_epoch=2.08e+7]Epoch 209/300:  69%|██████▉   | 208/300 [01:19<00:35,  2.63it/s, v_num=1, train_loss_step=2.03e+7, train_loss_epoch=2.08e+7]Epoch 209/300:  70%|██████▉   | 209/300 [01:20<00:34,  2.61it/s, v_num=1, train_loss_step=2.03e+7, train_loss_epoch=2.08e+7]Epoch 209/300:  70%|██████▉   | 209/300 [01:20<00:34,  2.61it/s, v_num=1, train_loss_step=1.97e+7, train_loss_epoch=2.06e+7]Epoch 210/300:  70%|██████▉   | 209/300 [01:20<00:34,  2.61it/s, v_num=1, train_loss_step=1.97e+7, train_loss_epoch=2.06e+7]Epoch 210/300:  70%|███████   | 210/300 [01:20<00:34,  2.57it/s, v_num=1, train_loss_step=1.97e+7, train_loss_epoch=2.06e+7]Epoch 210/300:  70%|███████   | 210/300 [01:20<00:34,  2.57it/s, v_num=1, train_loss_step=1.76e+7, train_loss_epoch=2.05e+7]Epoch 211/300:  70%|███████   | 210/300 [01:20<00:34,  2.57it/s, v_num=1, train_loss_step=1.76e+7, train_loss_epoch=2.05e+7]Epoch 211/300:  70%|███████   | 211/300 [01:21<00:35,  2.54it/s, v_num=1, train_loss_step=1.76e+7, train_loss_epoch=2.05e+7]Epoch 211/300:  70%|███████   | 211/300 [01:21<00:35,  2.54it/s, v_num=1, train_loss_step=1.78e+7, train_loss_epoch=2.03e+7]Epoch 212/300:  70%|███████   | 211/300 [01:21<00:35,  2.54it/s, v_num=1, train_loss_step=1.78e+7, train_loss_epoch=2.03e+7]Epoch 212/300:  71%|███████   | 212/300 [01:21<00:34,  2.56it/s, v_num=1, train_loss_step=1.78e+7, train_loss_epoch=2.03e+7]Epoch 212/300:  71%|███████   | 212/300 [01:21<00:34,  2.56it/s, v_num=1, train_loss_step=2e+7, train_loss_epoch=2.02e+7]   Epoch 213/300:  71%|███████   | 212/300 [01:21<00:34,  2.56it/s, v_num=1, train_loss_step=2e+7, train_loss_epoch=2.02e+7]Epoch 213/300:  71%|███████   | 213/300 [01:21<00:33,  2.58it/s, v_num=1, train_loss_step=2e+7, train_loss_epoch=2.02e+7]Epoch 213/300:  71%|███████   | 213/300 [01:21<00:33,  2.58it/s, v_num=1, train_loss_step=1.73e+7, train_loss_epoch=2e+7]Epoch 214/300:  71%|███████   | 213/300 [01:21<00:33,  2.58it/s, v_num=1, train_loss_step=1.73e+7, train_loss_epoch=2e+7]Epoch 214/300:  71%|███████▏  | 214/300 [01:22<00:33,  2.59it/s, v_num=1, train_loss_step=1.73e+7, train_loss_epoch=2e+7]Epoch 214/300:  71%|███████▏  | 214/300 [01:22<00:33,  2.59it/s, v_num=1, train_loss_step=2.1e+7, train_loss_epoch=1.99e+7]Epoch 215/300:  71%|███████▏  | 214/300 [01:22<00:33,  2.59it/s, v_num=1, train_loss_step=2.1e+7, train_loss_epoch=1.99e+7]Epoch 215/300:  72%|███████▏  | 215/300 [01:22<00:32,  2.60it/s, v_num=1, train_loss_step=2.1e+7, train_loss_epoch=1.99e+7]Epoch 215/300:  72%|███████▏  | 215/300 [01:22<00:32,  2.60it/s, v_num=1, train_loss_step=1.68e+7, train_loss_epoch=1.97e+7]Epoch 216/300:  72%|███████▏  | 215/300 [01:22<00:32,  2.60it/s, v_num=1, train_loss_step=1.68e+7, train_loss_epoch=1.97e+7]Epoch 216/300:  72%|███████▏  | 216/300 [01:23<00:31,  2.63it/s, v_num=1, train_loss_step=1.68e+7, train_loss_epoch=1.97e+7]Epoch 216/300:  72%|███████▏  | 216/300 [01:23<00:31,  2.63it/s, v_num=1, train_loss_step=1.8e+7, train_loss_epoch=1.96e+7] Epoch 217/300:  72%|███████▏  | 216/300 [01:23<00:31,  2.63it/s, v_num=1, train_loss_step=1.8e+7, train_loss_epoch=1.96e+7]Epoch 217/300:  72%|███████▏  | 217/300 [01:23<00:31,  2.62it/s, v_num=1, train_loss_step=1.8e+7, train_loss_epoch=1.96e+7]Epoch 217/300:  72%|███████▏  | 217/300 [01:23<00:31,  2.62it/s, v_num=1, train_loss_step=1.75e+7, train_loss_epoch=1.94e+7]Epoch 218/300:  72%|███████▏  | 217/300 [01:23<00:31,  2.62it/s, v_num=1, train_loss_step=1.75e+7, train_loss_epoch=1.94e+7]Epoch 218/300:  73%|███████▎  | 218/300 [01:23<00:31,  2.62it/s, v_num=1, train_loss_step=1.75e+7, train_loss_epoch=1.94e+7]Epoch 218/300:  73%|███████▎  | 218/300 [01:23<00:31,  2.62it/s, v_num=1, train_loss_step=1.82e+7, train_loss_epoch=1.93e+7]Epoch 219/300:  73%|███████▎  | 218/300 [01:23<00:31,  2.62it/s, v_num=1, train_loss_step=1.82e+7, train_loss_epoch=1.93e+7]Epoch 219/300:  73%|███████▎  | 219/300 [01:24<00:30,  2.63it/s, v_num=1, train_loss_step=1.82e+7, train_loss_epoch=1.93e+7]Epoch 219/300:  73%|███████▎  | 219/300 [01:24<00:30,  2.63it/s, v_num=1, train_loss_step=1.73e+7, train_loss_epoch=1.92e+7]Epoch 220/300:  73%|███████▎  | 219/300 [01:24<00:30,  2.63it/s, v_num=1, train_loss_step=1.73e+7, train_loss_epoch=1.92e+7]Epoch 220/300:  73%|███████▎  | 220/300 [01:24<00:30,  2.62it/s, v_num=1, train_loss_step=1.73e+7, train_loss_epoch=1.92e+7]Epoch 220/300:  73%|███████▎  | 220/300 [01:24<00:30,  2.62it/s, v_num=1, train_loss_step=1.71e+7, train_loss_epoch=1.9e+7] Epoch 221/300:  73%|███████▎  | 220/300 [01:24<00:30,  2.62it/s, v_num=1, train_loss_step=1.71e+7, train_loss_epoch=1.9e+7]Epoch 221/300:  74%|███████▎  | 221/300 [01:24<00:30,  2.63it/s, v_num=1, train_loss_step=1.71e+7, train_loss_epoch=1.9e+7]Epoch 221/300:  74%|███████▎  | 221/300 [01:24<00:30,  2.63it/s, v_num=1, train_loss_step=1.76e+7, train_loss_epoch=1.89e+7]Epoch 222/300:  74%|███████▎  | 221/300 [01:24<00:30,  2.63it/s, v_num=1, train_loss_step=1.76e+7, train_loss_epoch=1.89e+7]Epoch 222/300:  74%|███████▍  | 222/300 [01:25<00:29,  2.63it/s, v_num=1, train_loss_step=1.76e+7, train_loss_epoch=1.89e+7]Epoch 222/300:  74%|███████▍  | 222/300 [01:25<00:29,  2.63it/s, v_num=1, train_loss_step=1.69e+7, train_loss_epoch=1.88e+7]Epoch 223/300:  74%|███████▍  | 222/300 [01:25<00:29,  2.63it/s, v_num=1, train_loss_step=1.69e+7, train_loss_epoch=1.88e+7]Epoch 223/300:  74%|███████▍  | 223/300 [01:25<00:29,  2.63it/s, v_num=1, train_loss_step=1.69e+7, train_loss_epoch=1.88e+7]Epoch 223/300:  74%|███████▍  | 223/300 [01:25<00:29,  2.63it/s, v_num=1, train_loss_step=1.84e+7, train_loss_epoch=1.87e+7]Epoch 224/300:  74%|███████▍  | 223/300 [01:25<00:29,  2.63it/s, v_num=1, train_loss_step=1.84e+7, train_loss_epoch=1.87e+7]Epoch 224/300:  75%|███████▍  | 224/300 [01:26<00:29,  2.59it/s, v_num=1, train_loss_step=1.84e+7, train_loss_epoch=1.87e+7]Epoch 224/300:  75%|███████▍  | 224/300 [01:26<00:29,  2.59it/s, v_num=1, train_loss_step=1.7e+7, train_loss_epoch=1.85e+7] Epoch 225/300:  75%|███████▍  | 224/300 [01:26<00:29,  2.59it/s, v_num=1, train_loss_step=1.7e+7, train_loss_epoch=1.85e+7]Epoch 225/300:  75%|███████▌  | 225/300 [01:26<00:29,  2.56it/s, v_num=1, train_loss_step=1.7e+7, train_loss_epoch=1.85e+7]Epoch 225/300:  75%|███████▌  | 225/300 [01:26<00:29,  2.56it/s, v_num=1, train_loss_step=1.79e+7, train_loss_epoch=1.84e+7]Epoch 226/300:  75%|███████▌  | 225/300 [01:26<00:29,  2.56it/s, v_num=1, train_loss_step=1.79e+7, train_loss_epoch=1.84e+7]Epoch 226/300:  75%|███████▌  | 226/300 [01:26<00:28,  2.58it/s, v_num=1, train_loss_step=1.79e+7, train_loss_epoch=1.84e+7]Epoch 226/300:  75%|███████▌  | 226/300 [01:26<00:28,  2.58it/s, v_num=1, train_loss_step=1.81e+7, train_loss_epoch=1.83e+7]Epoch 227/300:  75%|███████▌  | 226/300 [01:26<00:28,  2.58it/s, v_num=1, train_loss_step=1.81e+7, train_loss_epoch=1.83e+7]Epoch 227/300:  76%|███████▌  | 227/300 [01:27<00:28,  2.59it/s, v_num=1, train_loss_step=1.81e+7, train_loss_epoch=1.83e+7]Epoch 227/300:  76%|███████▌  | 227/300 [01:27<00:28,  2.59it/s, v_num=1, train_loss_step=1.83e+7, train_loss_epoch=1.82e+7]Epoch 228/300:  76%|███████▌  | 227/300 [01:27<00:28,  2.59it/s, v_num=1, train_loss_step=1.83e+7, train_loss_epoch=1.82e+7]Epoch 228/300:  76%|███████▌  | 228/300 [01:27<00:27,  2.59it/s, v_num=1, train_loss_step=1.83e+7, train_loss_epoch=1.82e+7]Epoch 228/300:  76%|███████▌  | 228/300 [01:27<00:27,  2.59it/s, v_num=1, train_loss_step=1.67e+7, train_loss_epoch=1.81e+7]Epoch 229/300:  76%|███████▌  | 228/300 [01:27<00:27,  2.59it/s, v_num=1, train_loss_step=1.67e+7, train_loss_epoch=1.81e+7]Epoch 229/300:  76%|███████▋  | 229/300 [01:28<00:27,  2.61it/s, v_num=1, train_loss_step=1.67e+7, train_loss_epoch=1.81e+7]Epoch 229/300:  76%|███████▋  | 229/300 [01:28<00:27,  2.61it/s, v_num=1, train_loss_step=1.66e+7, train_loss_epoch=1.8e+7] Epoch 230/300:  76%|███████▋  | 229/300 [01:28<00:27,  2.61it/s, v_num=1, train_loss_step=1.66e+7, train_loss_epoch=1.8e+7]Epoch 230/300:  77%|███████▋  | 230/300 [01:28<00:26,  2.62it/s, v_num=1, train_loss_step=1.66e+7, train_loss_epoch=1.8e+7]Epoch 230/300:  77%|███████▋  | 230/300 [01:28<00:26,  2.62it/s, v_num=1, train_loss_step=1.96e+7, train_loss_epoch=1.79e+7]Epoch 231/300:  77%|███████▋  | 230/300 [01:28<00:26,  2.62it/s, v_num=1, train_loss_step=1.96e+7, train_loss_epoch=1.79e+7]Epoch 231/300:  77%|███████▋  | 231/300 [01:28<00:26,  2.61it/s, v_num=1, train_loss_step=1.96e+7, train_loss_epoch=1.79e+7]Epoch 231/300:  77%|███████▋  | 231/300 [01:28<00:26,  2.61it/s, v_num=1, train_loss_step=1.65e+7, train_loss_epoch=1.78e+7]Epoch 232/300:  77%|███████▋  | 231/300 [01:28<00:26,  2.61it/s, v_num=1, train_loss_step=1.65e+7, train_loss_epoch=1.78e+7]Epoch 232/300:  77%|███████▋  | 232/300 [01:29<00:25,  2.62it/s, v_num=1, train_loss_step=1.65e+7, train_loss_epoch=1.78e+7]Epoch 232/300:  77%|███████▋  | 232/300 [01:29<00:25,  2.62it/s, v_num=1, train_loss_step=1.76e+7, train_loss_epoch=1.76e+7]Epoch 233/300:  77%|███████▋  | 232/300 [01:29<00:25,  2.62it/s, v_num=1, train_loss_step=1.76e+7, train_loss_epoch=1.76e+7]Epoch 233/300:  78%|███████▊  | 233/300 [01:29<00:25,  2.60it/s, v_num=1, train_loss_step=1.76e+7, train_loss_epoch=1.76e+7]Epoch 233/300:  78%|███████▊  | 233/300 [01:29<00:25,  2.60it/s, v_num=1, train_loss_step=1.61e+7, train_loss_epoch=1.75e+7]Epoch 234/300:  78%|███████▊  | 233/300 [01:29<00:25,  2.60it/s, v_num=1, train_loss_step=1.61e+7, train_loss_epoch=1.75e+7]Epoch 234/300:  78%|███████▊  | 234/300 [01:29<00:25,  2.62it/s, v_num=1, train_loss_step=1.61e+7, train_loss_epoch=1.75e+7]Epoch 234/300:  78%|███████▊  | 234/300 [01:29<00:25,  2.62it/s, v_num=1, train_loss_step=1.79e+7, train_loss_epoch=1.74e+7]Epoch 235/300:  78%|███████▊  | 234/300 [01:29<00:25,  2.62it/s, v_num=1, train_loss_step=1.79e+7, train_loss_epoch=1.74e+7]Epoch 235/300:  78%|███████▊  | 235/300 [01:30<00:24,  2.62it/s, v_num=1, train_loss_step=1.79e+7, train_loss_epoch=1.74e+7]Epoch 235/300:  78%|███████▊  | 235/300 [01:30<00:24,  2.62it/s, v_num=1, train_loss_step=1.62e+7, train_loss_epoch=1.73e+7]Epoch 236/300:  78%|███████▊  | 235/300 [01:30<00:24,  2.62it/s, v_num=1, train_loss_step=1.62e+7, train_loss_epoch=1.73e+7]Epoch 236/300:  79%|███████▊  | 236/300 [01:30<00:24,  2.62it/s, v_num=1, train_loss_step=1.62e+7, train_loss_epoch=1.73e+7]Epoch 236/300:  79%|███████▊  | 236/300 [01:30<00:24,  2.62it/s, v_num=1, train_loss_step=1.62e+7, train_loss_epoch=1.72e+7]Epoch 237/300:  79%|███████▊  | 236/300 [01:30<00:24,  2.62it/s, v_num=1, train_loss_step=1.62e+7, train_loss_epoch=1.72e+7]Epoch 237/300:  79%|███████▉  | 237/300 [01:31<00:24,  2.57it/s, v_num=1, train_loss_step=1.62e+7, train_loss_epoch=1.72e+7]Epoch 237/300:  79%|███████▉  | 237/300 [01:31<00:24,  2.57it/s, v_num=1, train_loss_step=1.59e+7, train_loss_epoch=1.71e+7]Epoch 238/300:  79%|███████▉  | 237/300 [01:31<00:24,  2.57it/s, v_num=1, train_loss_step=1.59e+7, train_loss_epoch=1.71e+7]Epoch 238/300:  79%|███████▉  | 238/300 [01:31<00:24,  2.57it/s, v_num=1, train_loss_step=1.59e+7, train_loss_epoch=1.71e+7]Epoch 238/300:  79%|███████▉  | 238/300 [01:31<00:24,  2.57it/s, v_num=1, train_loss_step=1.42e+7, train_loss_epoch=1.7e+7] Epoch 239/300:  79%|███████▉  | 238/300 [01:31<00:24,  2.57it/s, v_num=1, train_loss_step=1.42e+7, train_loss_epoch=1.7e+7]Epoch 239/300:  80%|███████▉  | 239/300 [01:31<00:23,  2.56it/s, v_num=1, train_loss_step=1.42e+7, train_loss_epoch=1.7e+7]Epoch 239/300:  80%|███████▉  | 239/300 [01:31<00:23,  2.56it/s, v_num=1, train_loss_step=1.62e+7, train_loss_epoch=1.7e+7]Epoch 240/300:  80%|███████▉  | 239/300 [01:31<00:23,  2.56it/s, v_num=1, train_loss_step=1.62e+7, train_loss_epoch=1.7e+7]Epoch 240/300:  80%|████████  | 240/300 [01:32<00:23,  2.58it/s, v_num=1, train_loss_step=1.62e+7, train_loss_epoch=1.7e+7]Epoch 240/300:  80%|████████  | 240/300 [01:32<00:23,  2.58it/s, v_num=1, train_loss_step=1.39e+7, train_loss_epoch=1.69e+7]Epoch 241/300:  80%|████████  | 240/300 [01:32<00:23,  2.58it/s, v_num=1, train_loss_step=1.39e+7, train_loss_epoch=1.69e+7]Epoch 241/300:  80%|████████  | 241/300 [01:32<00:22,  2.60it/s, v_num=1, train_loss_step=1.39e+7, train_loss_epoch=1.69e+7]Epoch 241/300:  80%|████████  | 241/300 [01:32<00:22,  2.60it/s, v_num=1, train_loss_step=1.62e+7, train_loss_epoch=1.68e+7]Epoch 242/300:  80%|████████  | 241/300 [01:32<00:22,  2.60it/s, v_num=1, train_loss_step=1.62e+7, train_loss_epoch=1.68e+7]Epoch 242/300:  81%|████████  | 242/300 [01:33<00:22,  2.62it/s, v_num=1, train_loss_step=1.62e+7, train_loss_epoch=1.68e+7]Epoch 242/300:  81%|████████  | 242/300 [01:33<00:22,  2.62it/s, v_num=1, train_loss_step=1.62e+7, train_loss_epoch=1.67e+7]Epoch 243/300:  81%|████████  | 242/300 [01:33<00:22,  2.62it/s, v_num=1, train_loss_step=1.62e+7, train_loss_epoch=1.67e+7]Epoch 243/300:  81%|████████  | 243/300 [01:33<00:21,  2.62it/s, v_num=1, train_loss_step=1.62e+7, train_loss_epoch=1.67e+7]Epoch 243/300:  81%|████████  | 243/300 [01:33<00:21,  2.62it/s, v_num=1, train_loss_step=1.54e+7, train_loss_epoch=1.66e+7]Epoch 244/300:  81%|████████  | 243/300 [01:33<00:21,  2.62it/s, v_num=1, train_loss_step=1.54e+7, train_loss_epoch=1.66e+7]Epoch 244/300:  81%|████████▏ | 244/300 [01:33<00:21,  2.62it/s, v_num=1, train_loss_step=1.54e+7, train_loss_epoch=1.66e+7]Epoch 244/300:  81%|████████▏ | 244/300 [01:33<00:21,  2.62it/s, v_num=1, train_loss_step=1.61e+7, train_loss_epoch=1.65e+7]Epoch 245/300:  81%|████████▏ | 244/300 [01:33<00:21,  2.62it/s, v_num=1, train_loss_step=1.61e+7, train_loss_epoch=1.65e+7]Epoch 245/300:  82%|████████▏ | 245/300 [01:34<00:20,  2.64it/s, v_num=1, train_loss_step=1.61e+7, train_loss_epoch=1.65e+7]Epoch 245/300:  82%|████████▏ | 245/300 [01:34<00:20,  2.64it/s, v_num=1, train_loss_step=1.53e+7, train_loss_epoch=1.64e+7]Epoch 246/300:  82%|████████▏ | 245/300 [01:34<00:20,  2.64it/s, v_num=1, train_loss_step=1.53e+7, train_loss_epoch=1.64e+7]Epoch 246/300:  82%|████████▏ | 246/300 [01:34<00:20,  2.64it/s, v_num=1, train_loss_step=1.53e+7, train_loss_epoch=1.64e+7]Epoch 246/300:  82%|████████▏ | 246/300 [01:34<00:20,  2.64it/s, v_num=1, train_loss_step=1.56e+7, train_loss_epoch=1.63e+7]Epoch 247/300:  82%|████████▏ | 246/300 [01:34<00:20,  2.64it/s, v_num=1, train_loss_step=1.56e+7, train_loss_epoch=1.63e+7]Epoch 247/300:  82%|████████▏ | 247/300 [01:34<00:19,  2.65it/s, v_num=1, train_loss_step=1.56e+7, train_loss_epoch=1.63e+7]Epoch 247/300:  82%|████████▏ | 247/300 [01:34<00:19,  2.65it/s, v_num=1, train_loss_step=1.62e+7, train_loss_epoch=1.62e+7]Epoch 248/300:  82%|████████▏ | 247/300 [01:34<00:19,  2.65it/s, v_num=1, train_loss_step=1.62e+7, train_loss_epoch=1.62e+7]Epoch 248/300:  83%|████████▎ | 248/300 [01:35<00:19,  2.64it/s, v_num=1, train_loss_step=1.62e+7, train_loss_epoch=1.62e+7]Epoch 248/300:  83%|████████▎ | 248/300 [01:35<00:19,  2.64it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.62e+7] Epoch 249/300:  83%|████████▎ | 248/300 [01:35<00:19,  2.64it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.62e+7]Epoch 249/300:  83%|████████▎ | 249/300 [01:35<00:19,  2.64it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.62e+7]Epoch 249/300:  83%|████████▎ | 249/300 [01:35<00:19,  2.64it/s, v_num=1, train_loss_step=1.47e+7, train_loss_epoch=1.61e+7]Epoch 250/300:  83%|████████▎ | 249/300 [01:35<00:19,  2.64it/s, v_num=1, train_loss_step=1.47e+7, train_loss_epoch=1.61e+7]Epoch 250/300:  83%|████████▎ | 250/300 [01:36<00:19,  2.63it/s, v_num=1, train_loss_step=1.47e+7, train_loss_epoch=1.61e+7]Epoch 250/300:  83%|████████▎ | 250/300 [01:36<00:19,  2.63it/s, v_num=1, train_loss_step=1.54e+7, train_loss_epoch=1.6e+7] Epoch 251/300:  83%|████████▎ | 250/300 [01:36<00:19,  2.63it/s, v_num=1, train_loss_step=1.54e+7, train_loss_epoch=1.6e+7]Epoch 251/300:  84%|████████▎ | 251/300 [01:36<00:18,  2.59it/s, v_num=1, train_loss_step=1.54e+7, train_loss_epoch=1.6e+7]Epoch 251/300:  84%|████████▎ | 251/300 [01:36<00:18,  2.59it/s, v_num=1, train_loss_step=1.47e+7, train_loss_epoch=1.59e+7]Epoch 252/300:  84%|████████▎ | 251/300 [01:36<00:18,  2.59it/s, v_num=1, train_loss_step=1.47e+7, train_loss_epoch=1.59e+7]Epoch 252/300:  84%|████████▍ | 252/300 [01:36<00:18,  2.58it/s, v_num=1, train_loss_step=1.47e+7, train_loss_epoch=1.59e+7]Epoch 252/300:  84%|████████▍ | 252/300 [01:36<00:18,  2.58it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.59e+7] Epoch 253/300:  84%|████████▍ | 252/300 [01:36<00:18,  2.58it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.59e+7]Epoch 253/300:  84%|████████▍ | 253/300 [01:37<00:18,  2.57it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.59e+7]Epoch 253/300:  84%|████████▍ | 253/300 [01:37<00:18,  2.57it/s, v_num=1, train_loss_step=1.52e+7, train_loss_epoch=1.58e+7]Epoch 254/300:  84%|████████▍ | 253/300 [01:37<00:18,  2.57it/s, v_num=1, train_loss_step=1.52e+7, train_loss_epoch=1.58e+7]Epoch 254/300:  85%|████████▍ | 254/300 [01:37<00:17,  2.58it/s, v_num=1, train_loss_step=1.52e+7, train_loss_epoch=1.58e+7]Epoch 254/300:  85%|████████▍ | 254/300 [01:37<00:17,  2.58it/s, v_num=1, train_loss_step=1.49e+7, train_loss_epoch=1.57e+7]Epoch 255/300:  85%|████████▍ | 254/300 [01:37<00:17,  2.58it/s, v_num=1, train_loss_step=1.49e+7, train_loss_epoch=1.57e+7]Epoch 255/300:  85%|████████▌ | 255/300 [01:37<00:17,  2.59it/s, v_num=1, train_loss_step=1.49e+7, train_loss_epoch=1.57e+7]Epoch 255/300:  85%|████████▌ | 255/300 [01:37<00:17,  2.59it/s, v_num=1, train_loss_step=1.35e+7, train_loss_epoch=1.56e+7]Epoch 256/300:  85%|████████▌ | 255/300 [01:37<00:17,  2.59it/s, v_num=1, train_loss_step=1.35e+7, train_loss_epoch=1.56e+7]Epoch 256/300:  85%|████████▌ | 256/300 [01:38<00:16,  2.61it/s, v_num=1, train_loss_step=1.35e+7, train_loss_epoch=1.56e+7]Epoch 256/300:  85%|████████▌ | 256/300 [01:38<00:16,  2.61it/s, v_num=1, train_loss_step=1.44e+7, train_loss_epoch=1.56e+7]Epoch 257/300:  85%|████████▌ | 256/300 [01:38<00:16,  2.61it/s, v_num=1, train_loss_step=1.44e+7, train_loss_epoch=1.56e+7]Epoch 257/300:  86%|████████▌ | 257/300 [01:38<00:16,  2.61it/s, v_num=1, train_loss_step=1.44e+7, train_loss_epoch=1.56e+7]Epoch 257/300:  86%|████████▌ | 257/300 [01:38<00:16,  2.61it/s, v_num=1, train_loss_step=1.52e+7, train_loss_epoch=1.55e+7]Epoch 258/300:  86%|████████▌ | 257/300 [01:38<00:16,  2.61it/s, v_num=1, train_loss_step=1.52e+7, train_loss_epoch=1.55e+7]Epoch 258/300:  86%|████████▌ | 258/300 [01:39<00:16,  2.62it/s, v_num=1, train_loss_step=1.52e+7, train_loss_epoch=1.55e+7]Epoch 258/300:  86%|████████▌ | 258/300 [01:39<00:16,  2.62it/s, v_num=1, train_loss_step=1.49e+7, train_loss_epoch=1.54e+7]Epoch 259/300:  86%|████████▌ | 258/300 [01:39<00:16,  2.62it/s, v_num=1, train_loss_step=1.49e+7, train_loss_epoch=1.54e+7]Epoch 259/300:  86%|████████▋ | 259/300 [01:39<00:15,  2.61it/s, v_num=1, train_loss_step=1.49e+7, train_loss_epoch=1.54e+7]Epoch 259/300:  86%|████████▋ | 259/300 [01:39<00:15,  2.61it/s, v_num=1, train_loss_step=1.36e+7, train_loss_epoch=1.54e+7]Epoch 260/300:  86%|████████▋ | 259/300 [01:39<00:15,  2.61it/s, v_num=1, train_loss_step=1.36e+7, train_loss_epoch=1.54e+7]Epoch 260/300:  87%|████████▋ | 260/300 [01:39<00:15,  2.61it/s, v_num=1, train_loss_step=1.36e+7, train_loss_epoch=1.54e+7]Epoch 260/300:  87%|████████▋ | 260/300 [01:39<00:15,  2.61it/s, v_num=1, train_loss_step=1.41e+7, train_loss_epoch=1.53e+7]Epoch 261/300:  87%|████████▋ | 260/300 [01:39<00:15,  2.61it/s, v_num=1, train_loss_step=1.41e+7, train_loss_epoch=1.53e+7]Epoch 261/300:  87%|████████▋ | 261/300 [01:40<00:14,  2.61it/s, v_num=1, train_loss_step=1.41e+7, train_loss_epoch=1.53e+7]Epoch 261/300:  87%|████████▋ | 261/300 [01:40<00:14,  2.61it/s, v_num=1, train_loss_step=1.43e+7, train_loss_epoch=1.52e+7]Epoch 262/300:  87%|████████▋ | 261/300 [01:40<00:14,  2.61it/s, v_num=1, train_loss_step=1.43e+7, train_loss_epoch=1.52e+7]Epoch 262/300:  87%|████████▋ | 262/300 [01:40<00:14,  2.63it/s, v_num=1, train_loss_step=1.43e+7, train_loss_epoch=1.52e+7]Epoch 262/300:  87%|████████▋ | 262/300 [01:40<00:14,  2.63it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.52e+7] Epoch 263/300:  87%|████████▋ | 262/300 [01:40<00:14,  2.63it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.52e+7]Epoch 263/300:  88%|████████▊ | 263/300 [01:41<00:14,  2.61it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.52e+7]Epoch 263/300:  88%|████████▊ | 263/300 [01:41<00:14,  2.61it/s, v_num=1, train_loss_step=1.51e+7, train_loss_epoch=1.51e+7]Epoch 264/300:  88%|████████▊ | 263/300 [01:41<00:14,  2.61it/s, v_num=1, train_loss_step=1.51e+7, train_loss_epoch=1.51e+7]Epoch 264/300:  88%|████████▊ | 264/300 [01:41<00:13,  2.60it/s, v_num=1, train_loss_step=1.51e+7, train_loss_epoch=1.51e+7]Epoch 264/300:  88%|████████▊ | 264/300 [01:41<00:13,  2.60it/s, v_num=1, train_loss_step=1.36e+7, train_loss_epoch=1.5e+7] Epoch 265/300:  88%|████████▊ | 264/300 [01:41<00:13,  2.60it/s, v_num=1, train_loss_step=1.36e+7, train_loss_epoch=1.5e+7]Epoch 265/300:  88%|████████▊ | 265/300 [01:41<00:13,  2.58it/s, v_num=1, train_loss_step=1.36e+7, train_loss_epoch=1.5e+7]Epoch 265/300:  88%|████████▊ | 265/300 [01:41<00:13,  2.58it/s, v_num=1, train_loss_step=1.48e+7, train_loss_epoch=1.5e+7]Epoch 266/300:  88%|████████▊ | 265/300 [01:41<00:13,  2.58it/s, v_num=1, train_loss_step=1.48e+7, train_loss_epoch=1.5e+7]Epoch 266/300:  89%|████████▊ | 266/300 [01:42<00:13,  2.57it/s, v_num=1, train_loss_step=1.48e+7, train_loss_epoch=1.5e+7]Epoch 266/300:  89%|████████▊ | 266/300 [01:42<00:13,  2.57it/s, v_num=1, train_loss_step=1.49e+7, train_loss_epoch=1.49e+7]Epoch 267/300:  89%|████████▊ | 266/300 [01:42<00:13,  2.57it/s, v_num=1, train_loss_step=1.49e+7, train_loss_epoch=1.49e+7]Epoch 267/300:  89%|████████▉ | 267/300 [01:42<00:12,  2.59it/s, v_num=1, train_loss_step=1.49e+7, train_loss_epoch=1.49e+7]Epoch 267/300:  89%|████████▉ | 267/300 [01:42<00:12,  2.59it/s, v_num=1, train_loss_step=1.55e+7, train_loss_epoch=1.49e+7]Epoch 268/300:  89%|████████▉ | 267/300 [01:42<00:12,  2.59it/s, v_num=1, train_loss_step=1.55e+7, train_loss_epoch=1.49e+7]Epoch 268/300:  89%|████████▉ | 268/300 [01:42<00:12,  2.59it/s, v_num=1, train_loss_step=1.55e+7, train_loss_epoch=1.49e+7]Epoch 268/300:  89%|████████▉ | 268/300 [01:42<00:12,  2.59it/s, v_num=1, train_loss_step=1.56e+7, train_loss_epoch=1.48e+7]Epoch 269/300:  89%|████████▉ | 268/300 [01:42<00:12,  2.59it/s, v_num=1, train_loss_step=1.56e+7, train_loss_epoch=1.48e+7]Epoch 269/300:  90%|████████▉ | 269/300 [01:43<00:11,  2.61it/s, v_num=1, train_loss_step=1.56e+7, train_loss_epoch=1.48e+7]Epoch 269/300:  90%|████████▉ | 269/300 [01:43<00:11,  2.61it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.47e+7] Epoch 270/300:  90%|████████▉ | 269/300 [01:43<00:11,  2.61it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.47e+7]Epoch 270/300:  90%|█████████ | 270/300 [01:43<00:11,  2.61it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.47e+7]Epoch 270/300:  90%|█████████ | 270/300 [01:43<00:11,  2.61it/s, v_num=1, train_loss_step=1.29e+7, train_loss_epoch=1.47e+7]Epoch 271/300:  90%|█████████ | 270/300 [01:43<00:11,  2.61it/s, v_num=1, train_loss_step=1.29e+7, train_loss_epoch=1.47e+7]Epoch 271/300:  90%|█████████ | 271/300 [01:44<00:11,  2.63it/s, v_num=1, train_loss_step=1.29e+7, train_loss_epoch=1.47e+7]Epoch 271/300:  90%|█████████ | 271/300 [01:44<00:11,  2.63it/s, v_num=1, train_loss_step=1.32e+7, train_loss_epoch=1.46e+7]Epoch 272/300:  90%|█████████ | 271/300 [01:44<00:11,  2.63it/s, v_num=1, train_loss_step=1.32e+7, train_loss_epoch=1.46e+7]Epoch 272/300:  91%|█████████ | 272/300 [01:44<00:10,  2.62it/s, v_num=1, train_loss_step=1.32e+7, train_loss_epoch=1.46e+7]Epoch 272/300:  91%|█████████ | 272/300 [01:44<00:10,  2.62it/s, v_num=1, train_loss_step=1.31e+7, train_loss_epoch=1.46e+7]Epoch 273/300:  91%|█████████ | 272/300 [01:44<00:10,  2.62it/s, v_num=1, train_loss_step=1.31e+7, train_loss_epoch=1.46e+7]Epoch 273/300:  91%|█████████ | 273/300 [01:44<00:10,  2.64it/s, v_num=1, train_loss_step=1.31e+7, train_loss_epoch=1.46e+7]Epoch 273/300:  91%|█████████ | 273/300 [01:44<00:10,  2.64it/s, v_num=1, train_loss_step=1.25e+7, train_loss_epoch=1.45e+7]Epoch 274/300:  91%|█████████ | 273/300 [01:44<00:10,  2.64it/s, v_num=1, train_loss_step=1.25e+7, train_loss_epoch=1.45e+7]Epoch 274/300:  91%|█████████▏| 274/300 [01:45<00:09,  2.63it/s, v_num=1, train_loss_step=1.25e+7, train_loss_epoch=1.45e+7]Epoch 274/300:  91%|█████████▏| 274/300 [01:45<00:09,  2.63it/s, v_num=1, train_loss_step=1.56e+7, train_loss_epoch=1.45e+7]Epoch 275/300:  91%|█████████▏| 274/300 [01:45<00:09,  2.63it/s, v_num=1, train_loss_step=1.56e+7, train_loss_epoch=1.45e+7]Epoch 275/300:  92%|█████████▏| 275/300 [01:45<00:09,  2.63it/s, v_num=1, train_loss_step=1.56e+7, train_loss_epoch=1.45e+7]Epoch 275/300:  92%|█████████▏| 275/300 [01:45<00:09,  2.63it/s, v_num=1, train_loss_step=1.39e+7, train_loss_epoch=1.44e+7]Epoch 276/300:  92%|█████████▏| 275/300 [01:45<00:09,  2.63it/s, v_num=1, train_loss_step=1.39e+7, train_loss_epoch=1.44e+7]Epoch 276/300:  92%|█████████▏| 276/300 [01:46<00:09,  2.62it/s, v_num=1, train_loss_step=1.39e+7, train_loss_epoch=1.44e+7]Epoch 276/300:  92%|█████████▏| 276/300 [01:46<00:09,  2.62it/s, v_num=1, train_loss_step=1.45e+7, train_loss_epoch=1.44e+7]Epoch 277/300:  92%|█████████▏| 276/300 [01:46<00:09,  2.62it/s, v_num=1, train_loss_step=1.45e+7, train_loss_epoch=1.44e+7]Epoch 277/300:  92%|█████████▏| 277/300 [01:46<00:08,  2.61it/s, v_num=1, train_loss_step=1.45e+7, train_loss_epoch=1.44e+7]Epoch 277/300:  92%|█████████▏| 277/300 [01:46<00:08,  2.61it/s, v_num=1, train_loss_step=1.32e+7, train_loss_epoch=1.43e+7]Epoch 278/300:  92%|█████████▏| 277/300 [01:46<00:08,  2.61it/s, v_num=1, train_loss_step=1.32e+7, train_loss_epoch=1.43e+7]Epoch 278/300:  93%|█████████▎| 278/300 [01:46<00:08,  2.61it/s, v_num=1, train_loss_step=1.32e+7, train_loss_epoch=1.43e+7]Epoch 278/300:  93%|█████████▎| 278/300 [01:46<00:08,  2.61it/s, v_num=1, train_loss_step=1.21e+7, train_loss_epoch=1.43e+7]Epoch 279/300:  93%|█████████▎| 278/300 [01:46<00:08,  2.61it/s, v_num=1, train_loss_step=1.21e+7, train_loss_epoch=1.43e+7]Epoch 279/300:  93%|█████████▎| 279/300 [01:47<00:08,  2.59it/s, v_num=1, train_loss_step=1.21e+7, train_loss_epoch=1.43e+7]Epoch 279/300:  93%|█████████▎| 279/300 [01:47<00:08,  2.59it/s, v_num=1, train_loss_step=1.42e+7, train_loss_epoch=1.42e+7]Epoch 280/300:  93%|█████████▎| 279/300 [01:47<00:08,  2.59it/s, v_num=1, train_loss_step=1.42e+7, train_loss_epoch=1.42e+7]Epoch 280/300:  93%|█████████▎| 280/300 [01:47<00:07,  2.57it/s, v_num=1, train_loss_step=1.42e+7, train_loss_epoch=1.42e+7]Epoch 280/300:  93%|█████████▎| 280/300 [01:47<00:07,  2.57it/s, v_num=1, train_loss_step=1.31e+7, train_loss_epoch=1.42e+7]Epoch 281/300:  93%|█████████▎| 280/300 [01:47<00:07,  2.57it/s, v_num=1, train_loss_step=1.31e+7, train_loss_epoch=1.42e+7]Epoch 281/300:  94%|█████████▎| 281/300 [01:47<00:07,  2.60it/s, v_num=1, train_loss_step=1.31e+7, train_loss_epoch=1.42e+7]Epoch 281/300:  94%|█████████▎| 281/300 [01:47<00:07,  2.60it/s, v_num=1, train_loss_step=1.4e+7, train_loss_epoch=1.41e+7] Epoch 282/300:  94%|█████████▎| 281/300 [01:47<00:07,  2.60it/s, v_num=1, train_loss_step=1.4e+7, train_loss_epoch=1.41e+7]Epoch 282/300:  94%|█████████▍| 282/300 [01:48<00:06,  2.61it/s, v_num=1, train_loss_step=1.4e+7, train_loss_epoch=1.41e+7]Epoch 282/300:  94%|█████████▍| 282/300 [01:48<00:06,  2.61it/s, v_num=1, train_loss_step=1.33e+7, train_loss_epoch=1.41e+7]Epoch 283/300:  94%|█████████▍| 282/300 [01:48<00:06,  2.61it/s, v_num=1, train_loss_step=1.33e+7, train_loss_epoch=1.41e+7]Epoch 283/300:  94%|█████████▍| 283/300 [01:48<00:06,  2.61it/s, v_num=1, train_loss_step=1.33e+7, train_loss_epoch=1.41e+7]Epoch 283/300:  94%|█████████▍| 283/300 [01:48<00:06,  2.61it/s, v_num=1, train_loss_step=1.32e+7, train_loss_epoch=1.4e+7] Epoch 284/300:  94%|█████████▍| 283/300 [01:48<00:06,  2.61it/s, v_num=1, train_loss_step=1.32e+7, train_loss_epoch=1.4e+7]Epoch 284/300:  95%|█████████▍| 284/300 [01:49<00:06,  2.62it/s, v_num=1, train_loss_step=1.32e+7, train_loss_epoch=1.4e+7]Epoch 284/300:  95%|█████████▍| 284/300 [01:49<00:06,  2.62it/s, v_num=1, train_loss_step=1.2e+7, train_loss_epoch=1.4e+7] Epoch 285/300:  95%|█████████▍| 284/300 [01:49<00:06,  2.62it/s, v_num=1, train_loss_step=1.2e+7, train_loss_epoch=1.4e+7]Epoch 285/300:  95%|█████████▌| 285/300 [01:49<00:05,  2.61it/s, v_num=1, train_loss_step=1.2e+7, train_loss_epoch=1.4e+7]Epoch 285/300:  95%|█████████▌| 285/300 [01:49<00:05,  2.61it/s, v_num=1, train_loss_step=1.41e+7, train_loss_epoch=1.39e+7]Epoch 286/300:  95%|█████████▌| 285/300 [01:49<00:05,  2.61it/s, v_num=1, train_loss_step=1.41e+7, train_loss_epoch=1.39e+7]Epoch 286/300:  95%|█████████▌| 286/300 [01:49<00:05,  2.62it/s, v_num=1, train_loss_step=1.41e+7, train_loss_epoch=1.39e+7]Epoch 286/300:  95%|█████████▌| 286/300 [01:49<00:05,  2.62it/s, v_num=1, train_loss_step=1.29e+7, train_loss_epoch=1.39e+7]Epoch 287/300:  95%|█████████▌| 286/300 [01:49<00:05,  2.62it/s, v_num=1, train_loss_step=1.29e+7, train_loss_epoch=1.39e+7]Epoch 287/300:  96%|█████████▌| 287/300 [01:50<00:04,  2.62it/s, v_num=1, train_loss_step=1.29e+7, train_loss_epoch=1.39e+7]Epoch 287/300:  96%|█████████▌| 287/300 [01:50<00:04,  2.62it/s, v_num=1, train_loss_step=1.36e+7, train_loss_epoch=1.38e+7]Epoch 288/300:  96%|█████████▌| 287/300 [01:50<00:04,  2.62it/s, v_num=1, train_loss_step=1.36e+7, train_loss_epoch=1.38e+7]Epoch 288/300:  96%|█████████▌| 288/300 [01:50<00:04,  2.62it/s, v_num=1, train_loss_step=1.36e+7, train_loss_epoch=1.38e+7]Epoch 288/300:  96%|█████████▌| 288/300 [01:50<00:04,  2.62it/s, v_num=1, train_loss_step=1.43e+7, train_loss_epoch=1.38e+7]Epoch 289/300:  96%|█████████▌| 288/300 [01:50<00:04,  2.62it/s, v_num=1, train_loss_step=1.43e+7, train_loss_epoch=1.38e+7]Epoch 289/300:  96%|█████████▋| 289/300 [01:51<00:04,  2.62it/s, v_num=1, train_loss_step=1.43e+7, train_loss_epoch=1.38e+7]Epoch 289/300:  96%|█████████▋| 289/300 [01:51<00:04,  2.62it/s, v_num=1, train_loss_step=1.57e+7, train_loss_epoch=1.38e+7]Epoch 290/300:  96%|█████████▋| 289/300 [01:51<00:04,  2.62it/s, v_num=1, train_loss_step=1.57e+7, train_loss_epoch=1.38e+7]Epoch 290/300:  97%|█████████▋| 290/300 [01:51<00:03,  2.62it/s, v_num=1, train_loss_step=1.57e+7, train_loss_epoch=1.38e+7]Epoch 290/300:  97%|█████████▋| 290/300 [01:51<00:03,  2.62it/s, v_num=1, train_loss_step=1.46e+7, train_loss_epoch=1.37e+7]Epoch 291/300:  97%|█████████▋| 290/300 [01:51<00:03,  2.62it/s, v_num=1, train_loss_step=1.46e+7, train_loss_epoch=1.37e+7]Epoch 291/300:  97%|█████████▋| 291/300 [01:51<00:03,  2.61it/s, v_num=1, train_loss_step=1.46e+7, train_loss_epoch=1.37e+7]Epoch 291/300:  97%|█████████▋| 291/300 [01:51<00:03,  2.61it/s, v_num=1, train_loss_step=1.52e+7, train_loss_epoch=1.37e+7]Epoch 292/300:  97%|█████████▋| 291/300 [01:51<00:03,  2.61it/s, v_num=1, train_loss_step=1.52e+7, train_loss_epoch=1.37e+7]Epoch 292/300:  97%|█████████▋| 292/300 [01:52<00:03,  2.60it/s, v_num=1, train_loss_step=1.52e+7, train_loss_epoch=1.37e+7]Epoch 292/300:  97%|█████████▋| 292/300 [01:52<00:03,  2.60it/s, v_num=1, train_loss_step=1.29e+7, train_loss_epoch=1.36e+7]Epoch 293/300:  97%|█████████▋| 292/300 [01:52<00:03,  2.60it/s, v_num=1, train_loss_step=1.29e+7, train_loss_epoch=1.36e+7]Epoch 293/300:  98%|█████████▊| 293/300 [01:52<00:02,  2.60it/s, v_num=1, train_loss_step=1.29e+7, train_loss_epoch=1.36e+7]Epoch 293/300:  98%|█████████▊| 293/300 [01:52<00:02,  2.60it/s, v_num=1, train_loss_step=1.27e+7, train_loss_epoch=1.36e+7]Epoch 294/300:  98%|█████████▊| 293/300 [01:52<00:02,  2.60it/s, v_num=1, train_loss_step=1.27e+7, train_loss_epoch=1.36e+7]Epoch 294/300:  98%|█████████▊| 294/300 [01:52<00:02,  2.59it/s, v_num=1, train_loss_step=1.27e+7, train_loss_epoch=1.36e+7]Epoch 294/300:  98%|█████████▊| 294/300 [01:52<00:02,  2.59it/s, v_num=1, train_loss_step=1.24e+7, train_loss_epoch=1.36e+7]Epoch 295/300:  98%|█████████▊| 294/300 [01:52<00:02,  2.59it/s, v_num=1, train_loss_step=1.24e+7, train_loss_epoch=1.36e+7]Epoch 295/300:  98%|█████████▊| 295/300 [01:53<00:01,  2.59it/s, v_num=1, train_loss_step=1.24e+7, train_loss_epoch=1.36e+7]Epoch 295/300:  98%|█████████▊| 295/300 [01:53<00:01,  2.59it/s, v_num=1, train_loss_step=1.34e+7, train_loss_epoch=1.35e+7]Epoch 296/300:  98%|█████████▊| 295/300 [01:53<00:01,  2.59it/s, v_num=1, train_loss_step=1.34e+7, train_loss_epoch=1.35e+7]Epoch 296/300:  99%|█████████▊| 296/300 [01:53<00:01,  2.60it/s, v_num=1, train_loss_step=1.34e+7, train_loss_epoch=1.35e+7]Epoch 296/300:  99%|█████████▊| 296/300 [01:53<00:01,  2.60it/s, v_num=1, train_loss_step=1.28e+7, train_loss_epoch=1.35e+7]Epoch 297/300:  99%|█████████▊| 296/300 [01:53<00:01,  2.60it/s, v_num=1, train_loss_step=1.28e+7, train_loss_epoch=1.35e+7]Epoch 297/300:  99%|█████████▉| 297/300 [01:54<00:01,  2.62it/s, v_num=1, train_loss_step=1.28e+7, train_loss_epoch=1.35e+7]Epoch 297/300:  99%|█████████▉| 297/300 [01:54<00:01,  2.62it/s, v_num=1, train_loss_step=1.24e+7, train_loss_epoch=1.34e+7]Epoch 298/300:  99%|█████████▉| 297/300 [01:54<00:01,  2.62it/s, v_num=1, train_loss_step=1.24e+7, train_loss_epoch=1.34e+7]Epoch 298/300:  99%|█████████▉| 298/300 [01:54<00:00,  2.62it/s, v_num=1, train_loss_step=1.24e+7, train_loss_epoch=1.34e+7]Epoch 298/300:  99%|█████████▉| 298/300 [01:54<00:00,  2.62it/s, v_num=1, train_loss_step=1.24e+7, train_loss_epoch=1.34e+7]Epoch 299/300:  99%|█████████▉| 298/300 [01:54<00:00,  2.62it/s, v_num=1, train_loss_step=1.24e+7, train_loss_epoch=1.34e+7]Epoch 299/300: 100%|█████████▉| 299/300 [01:54<00:00,  2.62it/s, v_num=1, train_loss_step=1.24e+7, train_loss_epoch=1.34e+7]Epoch 299/300: 100%|█████████▉| 299/300 [01:54<00:00,  2.62it/s, v_num=1, train_loss_step=1.26e+7, train_loss_epoch=1.34e+7]Epoch 300/300: 100%|█████████▉| 299/300 [01:54<00:00,  2.62it/s, v_num=1, train_loss_step=1.26e+7, train_loss_epoch=1.34e+7]Epoch 300/300: 100%|██████████| 300/300 [01:55<00:00,  2.62it/s, v_num=1, train_loss_step=1.26e+7, train_loss_epoch=1.34e+7]Epoch 300/300: 100%|██████████| 300/300 [01:55<00:00,  2.62it/s, v_num=1, train_loss_step=1.37e+7, train_loss_epoch=1.33e+7]Epoch 300/300: 100%|██████████| 300/300 [01:55<00:00,  2.60it/s, v_num=1, train_loss_step=1.37e+7, train_loss_epoch=1.33e+7]
    +
    +
    +
    +
    +

    +
    +
    +
    +
    + +
    +

    6.3 Predict proportions on the spatial data

    First create a new st object with the correct genes and counts as a layer.

    -
    # this chunk has issues and therefore not evaluated
    +
    st_adata = adata_anterior_subset.copy()
     
    -st_adata = adata_anterior_subset.copy()
    -
    -st_adata.layers["counts"] = st_adata.X.copy()
    -st_adata = st_adata[:, deg].copy()
    -
    -SpatialStereoscope.setup_anndata(st_adata, layer="counts")
    +st_adata.layers["counts"] = st_adata.X.copy() +st_adata = st_adata[:, deg].copy() + +SpatialStereoscope.setup_anndata(st_adata, layer="counts")
    -
    # this chunk has issues and therefore not evaluated
    -
    -train=True
    -if train:
    -    spatial_model = SpatialStereoscope.from_rna_model(st_adata, sc_model)
    -    spatial_model.train(max_epochs = 3000)
    -    spatial_model.history["elbo_train"][10:].plot()
    -    spatial_model.save("./data/spatial/stmodel", overwrite = True)
    -else:
    -    spatial_model = SpatialStereoscope.load("./data/spatial/stmodel", st_adata)
    -    print("Loaded Spatial model from file!")
    +
    train=True
    +if train:
    +    spatial_model = SpatialStereoscope.from_rna_model(st_adata, sc_model)
    +    spatial_model.train(max_epochs = 3000)
    +    spatial_model.history["elbo_train"][10:].plot()
    +    spatial_model.save("./data/spatial/stmodel", overwrite = True)
    +else:
    +    spatial_model = SpatialStereoscope.load("./data/spatial/stmodel", st_adata)
    +    print("Loaded Spatial model from file!")
    +
    +
    Training:   0%|          | 0/3000 [00:00<?, ?it/s]Epoch 1/3000:   0%|          | 0/3000 [00:00<?, ?it/s]Epoch 1/3000:   0%|          | 1/3000 [00:00<26:27,  1.89it/s]Epoch 1/3000:   0%|          | 1/3000 [00:00<26:27,  1.89it/s, v_num=1, train_loss_step=1.54e+7, train_loss_epoch=1.67e+7]Epoch 2/3000:   0%|          | 1/3000 [00:00<26:27,  1.89it/s, v_num=1, train_loss_step=1.54e+7, train_loss_epoch=1.67e+7]Epoch 2/3000:   0%|          | 2/3000 [00:01<26:05,  1.92it/s, v_num=1, train_loss_step=1.54e+7, train_loss_epoch=1.67e+7]Epoch 2/3000:   0%|          | 2/3000 [00:01<26:05,  1.92it/s, v_num=1, train_loss_step=1.69e+7, train_loss_epoch=1.66e+7]Epoch 3/3000:   0%|          | 2/3000 [00:01<26:05,  1.92it/s, v_num=1, train_loss_step=1.69e+7, train_loss_epoch=1.66e+7]Epoch 3/3000:   0%|          | 3/3000 [00:01<23:55,  2.09it/s, v_num=1, train_loss_step=1.69e+7, train_loss_epoch=1.66e+7]Epoch 3/3000:   0%|          | 3/3000 [00:01<23:55,  2.09it/s, v_num=1, train_loss_step=1.67e+7, train_loss_epoch=1.65e+7]Epoch 4/3000:   0%|          | 3/3000 [00:01<23:55,  2.09it/s, v_num=1, train_loss_step=1.67e+7, train_loss_epoch=1.65e+7]Epoch 4/3000:   0%|          | 4/3000 [00:02<25:02,  1.99it/s, v_num=1, train_loss_step=1.67e+7, train_loss_epoch=1.65e+7]Epoch 4/3000:   0%|          | 4/3000 [00:02<25:02,  1.99it/s, v_num=1, train_loss_step=1.59e+7, train_loss_epoch=1.64e+7]Epoch 5/3000:   0%|          | 4/3000 [00:02<25:02,  1.99it/s, v_num=1, train_loss_step=1.59e+7, train_loss_epoch=1.64e+7]Epoch 5/3000:   0%|          | 5/3000 [00:02<24:15,  2.06it/s, v_num=1, train_loss_step=1.59e+7, train_loss_epoch=1.64e+7]Epoch 5/3000:   0%|          | 5/3000 [00:02<24:15,  2.06it/s, v_num=1, train_loss_step=1.59e+7, train_loss_epoch=1.63e+7]Epoch 6/3000:   0%|          | 5/3000 [00:02<24:15,  2.06it/s, v_num=1, train_loss_step=1.59e+7, train_loss_epoch=1.63e+7]Epoch 6/3000:   0%|          | 6/3000 [00:02<24:26,  2.04it/s, v_num=1, train_loss_step=1.59e+7, train_loss_epoch=1.63e+7]Epoch 6/3000:   0%|          | 6/3000 [00:02<24:26,  2.04it/s, v_num=1, train_loss_step=1.69e+7, train_loss_epoch=1.63e+7]Epoch 7/3000:   0%|          | 6/3000 [00:02<24:26,  2.04it/s, v_num=1, train_loss_step=1.69e+7, train_loss_epoch=1.63e+7]Epoch 7/3000:   0%|          | 7/3000 [00:03<24:00,  2.08it/s, v_num=1, train_loss_step=1.69e+7, train_loss_epoch=1.63e+7]Epoch 7/3000:   0%|          | 7/3000 [00:03<24:00,  2.08it/s, v_num=1, train_loss_step=1.64e+7, train_loss_epoch=1.62e+7]Epoch 8/3000:   0%|          | 7/3000 [00:03<24:00,  2.08it/s, v_num=1, train_loss_step=1.64e+7, train_loss_epoch=1.62e+7]Epoch 8/3000:   0%|          | 8/3000 [00:03<25:09,  1.98it/s, v_num=1, train_loss_step=1.64e+7, train_loss_epoch=1.62e+7]Epoch 8/3000:   0%|          | 8/3000 [00:03<25:09,  1.98it/s, v_num=1, train_loss_step=1.68e+7, train_loss_epoch=1.61e+7]Epoch 9/3000:   0%|          | 8/3000 [00:03<25:09,  1.98it/s, v_num=1, train_loss_step=1.68e+7, train_loss_epoch=1.61e+7]Epoch 9/3000:   0%|          | 9/3000 [00:04<25:25,  1.96it/s, v_num=1, train_loss_step=1.68e+7, train_loss_epoch=1.61e+7]Epoch 9/3000:   0%|          | 9/3000 [00:04<25:25,  1.96it/s, v_num=1, train_loss_step=1.63e+7, train_loss_epoch=1.6e+7] Epoch 10/3000:   0%|          | 9/3000 [00:04<25:25,  1.96it/s, v_num=1, train_loss_step=1.63e+7, train_loss_epoch=1.6e+7]Epoch 10/3000:   0%|          | 10/3000 [00:04<24:25,  2.04it/s, v_num=1, train_loss_step=1.63e+7, train_loss_epoch=1.6e+7]Epoch 10/3000:   0%|          | 10/3000 [00:04<24:25,  2.04it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.59e+7]Epoch 11/3000:   0%|          | 10/3000 [00:04<24:25,  2.04it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.59e+7]Epoch 11/3000:   0%|          | 11/3000 [00:05<23:21,  2.13it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.59e+7]Epoch 11/3000:   0%|          | 11/3000 [00:05<23:21,  2.13it/s, v_num=1, train_loss_step=1.67e+7, train_loss_epoch=1.59e+7]Epoch 12/3000:   0%|          | 11/3000 [00:05<23:21,  2.13it/s, v_num=1, train_loss_step=1.67e+7, train_loss_epoch=1.59e+7]Epoch 12/3000:   0%|          | 12/3000 [00:05<23:27,  2.12it/s, v_num=1, train_loss_step=1.67e+7, train_loss_epoch=1.59e+7]Epoch 12/3000:   0%|          | 12/3000 [00:05<23:27,  2.12it/s, v_num=1, train_loss_step=1.67e+7, train_loss_epoch=1.58e+7]Epoch 13/3000:   0%|          | 12/3000 [00:05<23:27,  2.12it/s, v_num=1, train_loss_step=1.67e+7, train_loss_epoch=1.58e+7]Epoch 13/3000:   0%|          | 13/3000 [00:06<24:05,  2.07it/s, v_num=1, train_loss_step=1.67e+7, train_loss_epoch=1.58e+7]Epoch 13/3000:   0%|          | 13/3000 [00:06<24:05,  2.07it/s, v_num=1, train_loss_step=1.58e+7, train_loss_epoch=1.57e+7]Epoch 14/3000:   0%|          | 13/3000 [00:06<24:05,  2.07it/s, v_num=1, train_loss_step=1.58e+7, train_loss_epoch=1.57e+7]Epoch 14/3000:   0%|          | 14/3000 [00:06<23:22,  2.13it/s, v_num=1, train_loss_step=1.58e+7, train_loss_epoch=1.57e+7]Epoch 14/3000:   0%|          | 14/3000 [00:06<23:22,  2.13it/s, v_num=1, train_loss_step=1.68e+7, train_loss_epoch=1.56e+7]Epoch 15/3000:   0%|          | 14/3000 [00:06<23:22,  2.13it/s, v_num=1, train_loss_step=1.68e+7, train_loss_epoch=1.56e+7]Epoch 15/3000:   0%|          | 15/3000 [00:07<23:12,  2.14it/s, v_num=1, train_loss_step=1.68e+7, train_loss_epoch=1.56e+7]Epoch 15/3000:   0%|          | 15/3000 [00:07<23:12,  2.14it/s, v_num=1, train_loss_step=1.63e+7, train_loss_epoch=1.55e+7]Epoch 16/3000:   0%|          | 15/3000 [00:07<23:12,  2.14it/s, v_num=1, train_loss_step=1.63e+7, train_loss_epoch=1.55e+7]Epoch 16/3000:   1%|          | 16/3000 [00:07<23:08,  2.15it/s, v_num=1, train_loss_step=1.63e+7, train_loss_epoch=1.55e+7]Epoch 16/3000:   1%|          | 16/3000 [00:07<23:08,  2.15it/s, v_num=1, train_loss_step=1.47e+7, train_loss_epoch=1.54e+7]Epoch 17/3000:   1%|          | 16/3000 [00:07<23:08,  2.15it/s, v_num=1, train_loss_step=1.47e+7, train_loss_epoch=1.54e+7]Epoch 17/3000:   1%|          | 17/3000 [00:08<21:25,  2.32it/s, v_num=1, train_loss_step=1.47e+7, train_loss_epoch=1.54e+7]Epoch 17/3000:   1%|          | 17/3000 [00:08<21:25,  2.32it/s, v_num=1, train_loss_step=1.58e+7, train_loss_epoch=1.54e+7]Epoch 18/3000:   1%|          | 17/3000 [00:08<21:25,  2.32it/s, v_num=1, train_loss_step=1.58e+7, train_loss_epoch=1.54e+7]Epoch 18/3000:   1%|          | 18/3000 [00:08<20:50,  2.38it/s, v_num=1, train_loss_step=1.58e+7, train_loss_epoch=1.54e+7]Epoch 18/3000:   1%|          | 18/3000 [00:08<20:50,  2.38it/s, v_num=1, train_loss_step=1.49e+7, train_loss_epoch=1.53e+7]Epoch 19/3000:   1%|          | 18/3000 [00:08<20:50,  2.38it/s, v_num=1, train_loss_step=1.49e+7, train_loss_epoch=1.53e+7]Epoch 19/3000:   1%|          | 19/3000 [00:08<19:56,  2.49it/s, v_num=1, train_loss_step=1.49e+7, train_loss_epoch=1.53e+7]Epoch 19/3000:   1%|          | 19/3000 [00:08<19:56,  2.49it/s, v_num=1, train_loss_step=1.46e+7, train_loss_epoch=1.52e+7]Epoch 20/3000:   1%|          | 19/3000 [00:08<19:56,  2.49it/s, v_num=1, train_loss_step=1.46e+7, train_loss_epoch=1.52e+7]Epoch 20/3000:   1%|          | 20/3000 [00:09<21:17,  2.33it/s, v_num=1, train_loss_step=1.46e+7, train_loss_epoch=1.52e+7]Epoch 20/3000:   1%|          | 20/3000 [00:09<21:17,  2.33it/s, v_num=1, train_loss_step=1.53e+7, train_loss_epoch=1.51e+7]Epoch 21/3000:   1%|          | 20/3000 [00:09<21:17,  2.33it/s, v_num=1, train_loss_step=1.53e+7, train_loss_epoch=1.51e+7]Epoch 21/3000:   1%|          | 21/3000 [00:09<20:13,  2.45it/s, v_num=1, train_loss_step=1.53e+7, train_loss_epoch=1.51e+7]Epoch 21/3000:   1%|          | 21/3000 [00:09<20:13,  2.45it/s, v_num=1, train_loss_step=1.56e+7, train_loss_epoch=1.5e+7] Epoch 22/3000:   1%|          | 21/3000 [00:09<20:13,  2.45it/s, v_num=1, train_loss_step=1.56e+7, train_loss_epoch=1.5e+7]Epoch 22/3000:   1%|          | 22/3000 [00:10<21:27,  2.31it/s, v_num=1, train_loss_step=1.56e+7, train_loss_epoch=1.5e+7]Epoch 22/3000:   1%|          | 22/3000 [00:10<21:27,  2.31it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.49e+7]Epoch 23/3000:   1%|          | 22/3000 [00:10<21:27,  2.31it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.49e+7]Epoch 23/3000:   1%|          | 23/3000 [00:10<20:39,  2.40it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.49e+7]Epoch 23/3000:   1%|          | 23/3000 [00:10<20:39,  2.40it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.48e+7]Epoch 24/3000:   1%|          | 23/3000 [00:10<20:39,  2.40it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.48e+7]Epoch 24/3000:   1%|          | 24/3000 [00:10<20:21,  2.44it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.48e+7]Epoch 24/3000:   1%|          | 24/3000 [00:10<20:21,  2.44it/s, v_num=1, train_loss_step=1.46e+7, train_loss_epoch=1.47e+7]Epoch 25/3000:   1%|          | 24/3000 [00:10<20:21,  2.44it/s, v_num=1, train_loss_step=1.46e+7, train_loss_epoch=1.47e+7]Epoch 25/3000:   1%|          | 25/3000 [00:11<22:08,  2.24it/s, v_num=1, train_loss_step=1.46e+7, train_loss_epoch=1.47e+7]Epoch 25/3000:   1%|          | 25/3000 [00:11<22:08,  2.24it/s, v_num=1, train_loss_step=1.36e+7, train_loss_epoch=1.47e+7]Epoch 26/3000:   1%|          | 25/3000 [00:11<22:08,  2.24it/s, v_num=1, train_loss_step=1.36e+7, train_loss_epoch=1.47e+7]Epoch 26/3000:   1%|          | 26/3000 [00:11<21:20,  2.32it/s, v_num=1, train_loss_step=1.36e+7, train_loss_epoch=1.47e+7]Epoch 26/3000:   1%|          | 26/3000 [00:11<21:20,  2.32it/s, v_num=1, train_loss_step=1.49e+7, train_loss_epoch=1.46e+7]Epoch 27/3000:   1%|          | 26/3000 [00:11<21:20,  2.32it/s, v_num=1, train_loss_step=1.49e+7, train_loss_epoch=1.46e+7]Epoch 27/3000:   1%|          | 27/3000 [00:12<21:25,  2.31it/s, v_num=1, train_loss_step=1.49e+7, train_loss_epoch=1.46e+7]Epoch 27/3000:   1%|          | 27/3000 [00:12<21:25,  2.31it/s, v_num=1, train_loss_step=1.37e+7, train_loss_epoch=1.45e+7]Epoch 28/3000:   1%|          | 27/3000 [00:12<21:25,  2.31it/s, v_num=1, train_loss_step=1.37e+7, train_loss_epoch=1.45e+7]Epoch 28/3000:   1%|          | 28/3000 [00:12<22:40,  2.18it/s, v_num=1, train_loss_step=1.37e+7, train_loss_epoch=1.45e+7]Epoch 28/3000:   1%|          | 28/3000 [00:12<22:40,  2.18it/s, v_num=1, train_loss_step=1.46e+7, train_loss_epoch=1.44e+7]Epoch 29/3000:   1%|          | 28/3000 [00:12<22:40,  2.18it/s, v_num=1, train_loss_step=1.46e+7, train_loss_epoch=1.44e+7]Epoch 29/3000:   1%|          | 29/3000 [00:13<23:30,  2.11it/s, v_num=1, train_loss_step=1.46e+7, train_loss_epoch=1.44e+7]Epoch 29/3000:   1%|          | 29/3000 [00:13<23:30,  2.11it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.43e+7] Epoch 30/3000:   1%|          | 29/3000 [00:13<23:30,  2.11it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.43e+7]Epoch 30/3000:   1%|          | 30/3000 [00:13<23:18,  2.12it/s, v_num=1, train_loss_step=1.5e+7, train_loss_epoch=1.43e+7]Epoch 30/3000:   1%|          | 30/3000 [00:13<23:18,  2.12it/s, v_num=1, train_loss_step=1.43e+7, train_loss_epoch=1.42e+7]Epoch 31/3000:   1%|          | 30/3000 [00:13<23:18,  2.12it/s, v_num=1, train_loss_step=1.43e+7, train_loss_epoch=1.42e+7]Epoch 31/3000:   1%|          | 31/3000 [00:14<24:17,  2.04it/s, v_num=1, train_loss_step=1.43e+7, train_loss_epoch=1.42e+7]Epoch 31/3000:   1%|          | 31/3000 [00:14<24:17,  2.04it/s, v_num=1, train_loss_step=1.37e+7, train_loss_epoch=1.41e+7]Epoch 32/3000:   1%|          | 31/3000 [00:14<24:17,  2.04it/s, v_num=1, train_loss_step=1.37e+7, train_loss_epoch=1.41e+7]Epoch 32/3000:   1%|          | 32/3000 [00:14<24:30,  2.02it/s, v_num=1, train_loss_step=1.37e+7, train_loss_epoch=1.41e+7]Epoch 32/3000:   1%|          | 32/3000 [00:14<24:30,  2.02it/s, v_num=1, train_loss_step=1.44e+7, train_loss_epoch=1.4e+7] Epoch 33/3000:   1%|          | 32/3000 [00:14<24:30,  2.02it/s, v_num=1, train_loss_step=1.44e+7, train_loss_epoch=1.4e+7]Epoch 33/3000:   1%|          | 33/3000 [00:15<24:09,  2.05it/s, v_num=1, train_loss_step=1.44e+7, train_loss_epoch=1.4e+7]Epoch 33/3000:   1%|          | 33/3000 [00:15<24:09,  2.05it/s, v_num=1, train_loss_step=1.41e+7, train_loss_epoch=1.4e+7]Epoch 34/3000:   1%|          | 33/3000 [00:15<24:09,  2.05it/s, v_num=1, train_loss_step=1.41e+7, train_loss_epoch=1.4e+7]Epoch 34/3000:   1%|          | 34/3000 [00:15<23:56,  2.07it/s, v_num=1, train_loss_step=1.41e+7, train_loss_epoch=1.4e+7]Epoch 34/3000:   1%|          | 34/3000 [00:15<23:56,  2.07it/s, v_num=1, train_loss_step=1.38e+7, train_loss_epoch=1.39e+7]Epoch 35/3000:   1%|          | 34/3000 [00:15<23:56,  2.07it/s, v_num=1, train_loss_step=1.38e+7, train_loss_epoch=1.39e+7]Epoch 35/3000:   1%|          | 35/3000 [00:16<23:27,  2.11it/s, v_num=1, train_loss_step=1.38e+7, train_loss_epoch=1.39e+7]Epoch 35/3000:   1%|          | 35/3000 [00:16<23:27,  2.11it/s, v_num=1, train_loss_step=1.26e+7, train_loss_epoch=1.38e+7]Epoch 36/3000:   1%|          | 35/3000 [00:16<23:27,  2.11it/s, v_num=1, train_loss_step=1.26e+7, train_loss_epoch=1.38e+7]Epoch 36/3000:   1%|          | 36/3000 [00:16<22:14,  2.22it/s, v_num=1, train_loss_step=1.26e+7, train_loss_epoch=1.38e+7]Epoch 36/3000:   1%|          | 36/3000 [00:16<22:14,  2.22it/s, v_num=1, train_loss_step=1.29e+7, train_loss_epoch=1.37e+7]Epoch 37/3000:   1%|          | 36/3000 [00:16<22:14,  2.22it/s, v_num=1, train_loss_step=1.29e+7, train_loss_epoch=1.37e+7]Epoch 37/3000:   1%|          | 37/3000 [00:17<23:21,  2.11it/s, v_num=1, train_loss_step=1.29e+7, train_loss_epoch=1.37e+7]Epoch 37/3000:   1%|          | 37/3000 [00:17<23:21,  2.11it/s, v_num=1, train_loss_step=1.3e+7, train_loss_epoch=1.36e+7] Epoch 38/3000:   1%|          | 37/3000 [00:17<23:21,  2.11it/s, v_num=1, train_loss_step=1.3e+7, train_loss_epoch=1.36e+7]Epoch 38/3000:   1%|▏         | 38/3000 [00:17<23:32,  2.10it/s, v_num=1, train_loss_step=1.3e+7, train_loss_epoch=1.36e+7]Epoch 38/3000:   1%|▏         | 38/3000 [00:17<23:32,  2.10it/s, v_num=1, train_loss_step=1.48e+7, train_loss_epoch=1.35e+7]Epoch 39/3000:   1%|▏         | 38/3000 [00:17<23:32,  2.10it/s, v_num=1, train_loss_step=1.48e+7, train_loss_epoch=1.35e+7]Epoch 39/3000:   1%|▏         | 39/3000 [00:18<24:00,  2.06it/s, v_num=1, train_loss_step=1.48e+7, train_loss_epoch=1.35e+7]Epoch 39/3000:   1%|▏         | 39/3000 [00:18<24:00,  2.06it/s, v_num=1, train_loss_step=1.44e+7, train_loss_epoch=1.34e+7]Epoch 40/3000:   1%|▏         | 39/3000 [00:18<24:00,  2.06it/s, v_num=1, train_loss_step=1.44e+7, train_loss_epoch=1.34e+7]Epoch 40/3000:   1%|▏         | 40/3000 [00:18<25:15,  1.95it/s, v_num=1, train_loss_step=1.44e+7, train_loss_epoch=1.34e+7]Epoch 40/3000:   1%|▏         | 40/3000 [00:18<25:15,  1.95it/s, v_num=1, train_loss_step=1.4e+7, train_loss_epoch=1.34e+7] Epoch 41/3000:   1%|▏         | 40/3000 [00:18<25:15,  1.95it/s, v_num=1, train_loss_step=1.4e+7, train_loss_epoch=1.34e+7]Epoch 41/3000:   1%|▏         | 41/3000 [00:19<24:37,  2.00it/s, v_num=1, train_loss_step=1.4e+7, train_loss_epoch=1.34e+7]Epoch 41/3000:   1%|▏         | 41/3000 [00:19<24:37,  2.00it/s, v_num=1, train_loss_step=1.41e+7, train_loss_epoch=1.33e+7]Epoch 42/3000:   1%|▏         | 41/3000 [00:19<24:37,  2.00it/s, v_num=1, train_loss_step=1.41e+7, train_loss_epoch=1.33e+7]Epoch 42/3000:   1%|▏         | 42/3000 [00:19<23:34,  2.09it/s, v_num=1, train_loss_step=1.41e+7, train_loss_epoch=1.33e+7]Epoch 42/3000:   1%|▏         | 42/3000 [00:19<23:34,  2.09it/s, v_num=1, train_loss_step=1.36e+7, train_loss_epoch=1.32e+7]Epoch 43/3000:   1%|▏         | 42/3000 [00:19<23:34,  2.09it/s, v_num=1, train_loss_step=1.36e+7, train_loss_epoch=1.32e+7]Epoch 43/3000:   1%|▏         | 43/3000 [00:20<22:47,  2.16it/s, v_num=1, train_loss_step=1.36e+7, train_loss_epoch=1.32e+7]Epoch 43/3000:   1%|▏         | 43/3000 [00:20<22:47,  2.16it/s, v_num=1, train_loss_step=1.32e+7, train_loss_epoch=1.31e+7]Epoch 44/3000:   1%|▏         | 43/3000 [00:20<22:47,  2.16it/s, v_num=1, train_loss_step=1.32e+7, train_loss_epoch=1.31e+7]Epoch 44/3000:   1%|▏         | 44/3000 [00:20<22:50,  2.16it/s, v_num=1, train_loss_step=1.32e+7, train_loss_epoch=1.31e+7]Epoch 44/3000:   1%|▏         | 44/3000 [00:20<22:50,  2.16it/s, v_num=1, train_loss_step=1.44e+7, train_loss_epoch=1.3e+7] Epoch 45/3000:   1%|▏         | 44/3000 [00:20<22:50,  2.16it/s, v_num=1, train_loss_step=1.44e+7, train_loss_epoch=1.3e+7]Epoch 45/3000:   2%|▏         | 45/3000 [00:21<23:31,  2.09it/s, v_num=1, train_loss_step=1.44e+7, train_loss_epoch=1.3e+7]Epoch 45/3000:   2%|▏         | 45/3000 [00:21<23:31,  2.09it/s, v_num=1, train_loss_step=1.25e+7, train_loss_epoch=1.29e+7]Epoch 46/3000:   2%|▏         | 45/3000 [00:21<23:31,  2.09it/s, v_num=1, train_loss_step=1.25e+7, train_loss_epoch=1.29e+7]Epoch 46/3000:   2%|▏         | 46/3000 [00:21<23:44,  2.07it/s, v_num=1, train_loss_step=1.25e+7, train_loss_epoch=1.29e+7]Epoch 46/3000:   2%|▏         | 46/3000 [00:21<23:44,  2.07it/s, v_num=1, train_loss_step=1.34e+7, train_loss_epoch=1.29e+7]Epoch 47/3000:   2%|▏         | 46/3000 [00:21<23:44,  2.07it/s, v_num=1, train_loss_step=1.34e+7, train_loss_epoch=1.29e+7]Epoch 47/3000:   2%|▏         | 47/3000 [00:21<23:29,  2.10it/s, v_num=1, train_loss_step=1.34e+7, train_loss_epoch=1.29e+7]Epoch 47/3000:   2%|▏         | 47/3000 [00:21<23:29,  2.10it/s, v_num=1, train_loss_step=1.37e+7, train_loss_epoch=1.28e+7]Epoch 48/3000:   2%|▏         | 47/3000 [00:21<23:29,  2.10it/s, v_num=1, train_loss_step=1.37e+7, train_loss_epoch=1.28e+7]Epoch 48/3000:   2%|▏         | 48/3000 [00:22<22:59,  2.14it/s, v_num=1, train_loss_step=1.37e+7, train_loss_epoch=1.28e+7]Epoch 48/3000:   2%|▏         | 48/3000 [00:22<22:59,  2.14it/s, v_num=1, train_loss_step=1.15e+7, train_loss_epoch=1.27e+7]Epoch 49/3000:   2%|▏         | 48/3000 [00:22<22:59,  2.14it/s, v_num=1, train_loss_step=1.15e+7, train_loss_epoch=1.27e+7]Epoch 49/3000:   2%|▏         | 49/3000 [00:22<23:04,  2.13it/s, v_num=1, train_loss_step=1.15e+7, train_loss_epoch=1.27e+7]Epoch 49/3000:   2%|▏         | 49/3000 [00:22<23:04,  2.13it/s, v_num=1, train_loss_step=1.25e+7, train_loss_epoch=1.26e+7]Epoch 50/3000:   2%|▏         | 49/3000 [00:22<23:04,  2.13it/s, v_num=1, train_loss_step=1.25e+7, train_loss_epoch=1.26e+7]Epoch 50/3000:   2%|▏         | 50/3000 [00:23<23:01,  2.14it/s, v_num=1, train_loss_step=1.25e+7, train_loss_epoch=1.26e+7]Epoch 50/3000:   2%|▏         | 50/3000 [00:23<23:01,  2.14it/s, v_num=1, train_loss_step=1.21e+7, train_loss_epoch=1.25e+7]Epoch 51/3000:   2%|▏         | 50/3000 [00:23<23:01,  2.14it/s, v_num=1, train_loss_step=1.21e+7, train_loss_epoch=1.25e+7]Epoch 51/3000:   2%|▏         | 51/3000 [00:23<24:08,  2.04it/s, v_num=1, train_loss_step=1.21e+7, train_loss_epoch=1.25e+7]Epoch 51/3000:   2%|▏         | 51/3000 [00:23<24:08,  2.04it/s, v_num=1, train_loss_step=1.26e+7, train_loss_epoch=1.25e+7]Epoch 52/3000:   2%|▏         | 51/3000 [00:23<24:08,  2.04it/s, v_num=1, train_loss_step=1.26e+7, train_loss_epoch=1.25e+7]Epoch 52/3000:   2%|▏         | 52/3000 [00:24<23:52,  2.06it/s, v_num=1, train_loss_step=1.26e+7, train_loss_epoch=1.25e+7]Epoch 52/3000:   2%|▏         | 52/3000 [00:24<23:52,  2.06it/s, v_num=1, train_loss_step=1.12e+7, train_loss_epoch=1.24e+7]Epoch 53/3000:   2%|▏         | 52/3000 [00:24<23:52,  2.06it/s, v_num=1, train_loss_step=1.12e+7, train_loss_epoch=1.24e+7]Epoch 53/3000:   2%|▏         | 53/3000 [00:24<24:03,  2.04it/s, v_num=1, train_loss_step=1.12e+7, train_loss_epoch=1.24e+7]Epoch 53/3000:   2%|▏         | 53/3000 [00:24<24:03,  2.04it/s, v_num=1, train_loss_step=1.23e+7, train_loss_epoch=1.23e+7]Epoch 54/3000:   2%|▏         | 53/3000 [00:24<24:03,  2.04it/s, v_num=1, train_loss_step=1.23e+7, train_loss_epoch=1.23e+7]Epoch 54/3000:   2%|▏         | 54/3000 [00:25<23:35,  2.08it/s, v_num=1, train_loss_step=1.23e+7, train_loss_epoch=1.23e+7]Epoch 54/3000:   2%|▏         | 54/3000 [00:25<23:35,  2.08it/s, v_num=1, train_loss_step=1.37e+7, train_loss_epoch=1.22e+7]Epoch 55/3000:   2%|▏         | 54/3000 [00:25<23:35,  2.08it/s, v_num=1, train_loss_step=1.37e+7, train_loss_epoch=1.22e+7]Epoch 55/3000:   2%|▏         | 55/3000 [00:25<23:07,  2.12it/s, v_num=1, train_loss_step=1.37e+7, train_loss_epoch=1.22e+7]Epoch 55/3000:   2%|▏         | 55/3000 [00:25<23:07,  2.12it/s, v_num=1, train_loss_step=1.24e+7, train_loss_epoch=1.22e+7]Epoch 56/3000:   2%|▏         | 55/3000 [00:25<23:07,  2.12it/s, v_num=1, train_loss_step=1.24e+7, train_loss_epoch=1.22e+7]Epoch 56/3000:   2%|▏         | 56/3000 [00:26<22:42,  2.16it/s, v_num=1, train_loss_step=1.24e+7, train_loss_epoch=1.22e+7]Epoch 56/3000:   2%|▏         | 56/3000 [00:26<22:42,  2.16it/s, v_num=1, train_loss_step=1.19e+7, train_loss_epoch=1.21e+7]Epoch 57/3000:   2%|▏         | 56/3000 [00:26<22:42,  2.16it/s, v_num=1, train_loss_step=1.19e+7, train_loss_epoch=1.21e+7]Epoch 57/3000:   2%|▏         | 57/3000 [00:26<21:27,  2.29it/s, v_num=1, train_loss_step=1.19e+7, train_loss_epoch=1.21e+7]Epoch 57/3000:   2%|▏         | 57/3000 [00:26<21:27,  2.29it/s, v_num=1, train_loss_step=1.32e+7, train_loss_epoch=1.2e+7] Epoch 58/3000:   2%|▏         | 57/3000 [00:26<21:27,  2.29it/s, v_num=1, train_loss_step=1.32e+7, train_loss_epoch=1.2e+7]Epoch 58/3000:   2%|▏         | 58/3000 [00:26<20:04,  2.44it/s, v_num=1, train_loss_step=1.32e+7, train_loss_epoch=1.2e+7]Epoch 58/3000:   2%|▏         | 58/3000 [00:26<20:04,  2.44it/s, v_num=1, train_loss_step=1.17e+7, train_loss_epoch=1.19e+7]Epoch 59/3000:   2%|▏         | 58/3000 [00:26<20:04,  2.44it/s, v_num=1, train_loss_step=1.17e+7, train_loss_epoch=1.19e+7]Epoch 59/3000:   2%|▏         | 59/3000 [00:27<19:09,  2.56it/s, v_num=1, train_loss_step=1.17e+7, train_loss_epoch=1.19e+7]Epoch 59/3000:   2%|▏         | 59/3000 [00:27<19:09,  2.56it/s, v_num=1, train_loss_step=1.12e+7, train_loss_epoch=1.19e+7]Epoch 60/3000:   2%|▏         | 59/3000 [00:27<19:09,  2.56it/s, v_num=1, train_loss_step=1.12e+7, train_loss_epoch=1.19e+7]Epoch 60/3000:   2%|▏         | 60/3000 [00:27<20:03,  2.44it/s, v_num=1, train_loss_step=1.12e+7, train_loss_epoch=1.19e+7]Epoch 60/3000:   2%|▏         | 60/3000 [00:27<20:03,  2.44it/s, v_num=1, train_loss_step=1.17e+7, train_loss_epoch=1.18e+7]Epoch 61/3000:   2%|▏         | 60/3000 [00:27<20:03,  2.44it/s, v_num=1, train_loss_step=1.17e+7, train_loss_epoch=1.18e+7]Epoch 61/3000:   2%|▏         | 61/3000 [00:28<20:18,  2.41it/s, v_num=1, train_loss_step=1.17e+7, train_loss_epoch=1.18e+7]Epoch 61/3000:   2%|▏         | 61/3000 [00:28<20:18,  2.41it/s, v_num=1, train_loss_step=1.14e+7, train_loss_epoch=1.17e+7]Epoch 62/3000:   2%|▏         | 61/3000 [00:28<20:18,  2.41it/s, v_num=1, train_loss_step=1.14e+7, train_loss_epoch=1.17e+7]Epoch 62/3000:   2%|▏         | 62/3000 [00:28<21:23,  2.29it/s, v_num=1, train_loss_step=1.14e+7, train_loss_epoch=1.17e+7]Epoch 62/3000:   2%|▏         | 62/3000 [00:28<21:23,  2.29it/s, v_num=1, train_loss_step=1.17e+7, train_loss_epoch=1.16e+7]Epoch 63/3000:   2%|▏         | 62/3000 [00:28<21:23,  2.29it/s, v_num=1, train_loss_step=1.17e+7, train_loss_epoch=1.16e+7]Epoch 63/3000:   2%|▏         | 63/3000 [00:29<22:00,  2.22it/s, v_num=1, train_loss_step=1.17e+7, train_loss_epoch=1.16e+7]Epoch 63/3000:   2%|▏         | 63/3000 [00:29<22:00,  2.22it/s, v_num=1, train_loss_step=1.16e+7, train_loss_epoch=1.16e+7]Epoch 64/3000:   2%|▏         | 63/3000 [00:29<22:00,  2.22it/s, v_num=1, train_loss_step=1.16e+7, train_loss_epoch=1.16e+7]Epoch 64/3000:   2%|▏         | 64/3000 [00:29<21:47,  2.25it/s, v_num=1, train_loss_step=1.16e+7, train_loss_epoch=1.16e+7]Epoch 64/3000:   2%|▏         | 64/3000 [00:29<21:47,  2.25it/s, v_num=1, train_loss_step=1.18e+7, train_loss_epoch=1.15e+7]Epoch 65/3000:   2%|▏         | 64/3000 [00:29<21:47,  2.25it/s, v_num=1, train_loss_step=1.18e+7, train_loss_epoch=1.15e+7]Epoch 65/3000:   2%|▏         | 65/3000 [00:30<21:52,  2.24it/s, v_num=1, train_loss_step=1.18e+7, train_loss_epoch=1.15e+7]Epoch 65/3000:   2%|▏         | 65/3000 [00:30<21:52,  2.24it/s, v_num=1, train_loss_step=1.26e+7, train_loss_epoch=1.14e+7]Epoch 66/3000:   2%|▏         | 65/3000 [00:30<21:52,  2.24it/s, v_num=1, train_loss_step=1.26e+7, train_loss_epoch=1.14e+7]Epoch 66/3000:   2%|▏         | 66/3000 [00:30<21:36,  2.26it/s, v_num=1, train_loss_step=1.26e+7, train_loss_epoch=1.14e+7]Epoch 66/3000:   2%|▏         | 66/3000 [00:30<21:36,  2.26it/s, v_num=1, train_loss_step=1.16e+7, train_loss_epoch=1.14e+7]Epoch 67/3000:   2%|▏         | 66/3000 [00:30<21:36,  2.26it/s, v_num=1, train_loss_step=1.16e+7, train_loss_epoch=1.14e+7]Epoch 67/3000:   2%|▏         | 67/3000 [00:30<21:43,  2.25it/s, v_num=1, train_loss_step=1.16e+7, train_loss_epoch=1.14e+7]Epoch 67/3000:   2%|▏         | 67/3000 [00:30<21:43,  2.25it/s, v_num=1, train_loss_step=1.23e+7, train_loss_epoch=1.13e+7]Epoch 68/3000:   2%|▏         | 67/3000 [00:30<21:43,  2.25it/s, v_num=1, train_loss_step=1.23e+7, train_loss_epoch=1.13e+7]Epoch 68/3000:   2%|▏         | 68/3000 [00:31<21:39,  2.26it/s, v_num=1, train_loss_step=1.23e+7, train_loss_epoch=1.13e+7]Epoch 68/3000:   2%|▏         | 68/3000 [00:31<21:39,  2.26it/s, v_num=1, train_loss_step=1.15e+7, train_loss_epoch=1.12e+7]Epoch 69/3000:   2%|▏         | 68/3000 [00:31<21:39,  2.26it/s, v_num=1, train_loss_step=1.15e+7, train_loss_epoch=1.12e+7]Epoch 69/3000:   2%|▏         | 69/3000 [00:31<22:55,  2.13it/s, v_num=1, train_loss_step=1.15e+7, train_loss_epoch=1.12e+7]Epoch 69/3000:   2%|▏         | 69/3000 [00:31<22:55,  2.13it/s, v_num=1, train_loss_step=1.07e+7, train_loss_epoch=1.12e+7]Epoch 70/3000:   2%|▏         | 69/3000 [00:31<22:55,  2.13it/s, v_num=1, train_loss_step=1.07e+7, train_loss_epoch=1.12e+7]Epoch 70/3000:   2%|▏         | 70/3000 [00:32<22:28,  2.17it/s, v_num=1, train_loss_step=1.07e+7, train_loss_epoch=1.12e+7]Epoch 70/3000:   2%|▏         | 70/3000 [00:32<22:28,  2.17it/s, v_num=1, train_loss_step=1.03e+7, train_loss_epoch=1.11e+7]Epoch 71/3000:   2%|▏         | 70/3000 [00:32<22:28,  2.17it/s, v_num=1, train_loss_step=1.03e+7, train_loss_epoch=1.11e+7]Epoch 71/3000:   2%|▏         | 71/3000 [00:32<21:35,  2.26it/s, v_num=1, train_loss_step=1.03e+7, train_loss_epoch=1.11e+7]Epoch 71/3000:   2%|▏         | 71/3000 [00:32<21:35,  2.26it/s, v_num=1, train_loss_step=1.1e+7, train_loss_epoch=1.1e+7]  Epoch 72/3000:   2%|▏         | 71/3000 [00:32<21:35,  2.26it/s, v_num=1, train_loss_step=1.1e+7, train_loss_epoch=1.1e+7]Epoch 72/3000:   2%|▏         | 72/3000 [00:33<22:36,  2.16it/s, v_num=1, train_loss_step=1.1e+7, train_loss_epoch=1.1e+7]Epoch 72/3000:   2%|▏         | 72/3000 [00:33<22:36,  2.16it/s, v_num=1, train_loss_step=1.12e+7, train_loss_epoch=1.1e+7]Epoch 73/3000:   2%|▏         | 72/3000 [00:33<22:36,  2.16it/s, v_num=1, train_loss_step=1.12e+7, train_loss_epoch=1.1e+7]Epoch 73/3000:   2%|▏         | 73/3000 [00:33<22:06,  2.21it/s, v_num=1, train_loss_step=1.12e+7, train_loss_epoch=1.1e+7]Epoch 73/3000:   2%|▏         | 73/3000 [00:33<22:06,  2.21it/s, v_num=1, train_loss_step=1.08e+7, train_loss_epoch=1.09e+7]Epoch 74/3000:   2%|▏         | 73/3000 [00:33<22:06,  2.21it/s, v_num=1, train_loss_step=1.08e+7, train_loss_epoch=1.09e+7]Epoch 74/3000:   2%|▏         | 74/3000 [00:34<22:13,  2.19it/s, v_num=1, train_loss_step=1.08e+7, train_loss_epoch=1.09e+7]Epoch 74/3000:   2%|▏         | 74/3000 [00:34<22:13,  2.19it/s, v_num=1, train_loss_step=9.71e+6, train_loss_epoch=1.08e+7]Epoch 75/3000:   2%|▏         | 74/3000 [00:34<22:13,  2.19it/s, v_num=1, train_loss_step=9.71e+6, train_loss_epoch=1.08e+7]Epoch 75/3000:   2%|▎         | 75/3000 [00:34<22:16,  2.19it/s, v_num=1, train_loss_step=9.71e+6, train_loss_epoch=1.08e+7]Epoch 75/3000:   2%|▎         | 75/3000 [00:34<22:16,  2.19it/s, v_num=1, train_loss_step=9.99e+6, train_loss_epoch=1.08e+7]Epoch 76/3000:   2%|▎         | 75/3000 [00:34<22:16,  2.19it/s, v_num=1, train_loss_step=9.99e+6, train_loss_epoch=1.08e+7]Epoch 76/3000:   3%|▎         | 76/3000 [00:34<21:33,  2.26it/s, v_num=1, train_loss_step=9.99e+6, train_loss_epoch=1.08e+7]Epoch 76/3000:   3%|▎         | 76/3000 [00:35<21:33,  2.26it/s, v_num=1, train_loss_step=1.08e+7, train_loss_epoch=1.07e+7]Epoch 77/3000:   3%|▎         | 76/3000 [00:35<21:33,  2.26it/s, v_num=1, train_loss_step=1.08e+7, train_loss_epoch=1.07e+7]Epoch 77/3000:   3%|▎         | 77/3000 [00:35<21:30,  2.26it/s, v_num=1, train_loss_step=1.08e+7, train_loss_epoch=1.07e+7]Epoch 77/3000:   3%|▎         | 77/3000 [00:35<21:30,  2.26it/s, v_num=1, train_loss_step=1.04e+7, train_loss_epoch=1.06e+7]Epoch 78/3000:   3%|▎         | 77/3000 [00:35<21:30,  2.26it/s, v_num=1, train_loss_step=1.04e+7, train_loss_epoch=1.06e+7]Epoch 78/3000:   3%|▎         | 78/3000 [00:35<21:13,  2.29it/s, v_num=1, train_loss_step=1.04e+7, train_loss_epoch=1.06e+7]Epoch 78/3000:   3%|▎         | 78/3000 [00:35<21:13,  2.29it/s, v_num=1, train_loss_step=9.7e+6, train_loss_epoch=1.06e+7] Epoch 79/3000:   3%|▎         | 78/3000 [00:35<21:13,  2.29it/s, v_num=1, train_loss_step=9.7e+6, train_loss_epoch=1.06e+7]Epoch 79/3000:   3%|▎         | 79/3000 [00:36<21:40,  2.25it/s, v_num=1, train_loss_step=9.7e+6, train_loss_epoch=1.06e+7]Epoch 79/3000:   3%|▎         | 79/3000 [00:36<21:40,  2.25it/s, v_num=1, train_loss_step=1.07e+7, train_loss_epoch=1.05e+7]Epoch 80/3000:   3%|▎         | 79/3000 [00:36<21:40,  2.25it/s, v_num=1, train_loss_step=1.07e+7, train_loss_epoch=1.05e+7]Epoch 80/3000:   3%|▎         | 80/3000 [00:36<20:42,  2.35it/s, v_num=1, train_loss_step=1.07e+7, train_loss_epoch=1.05e+7]Epoch 80/3000:   3%|▎         | 80/3000 [00:36<20:42,  2.35it/s, v_num=1, train_loss_step=1.07e+7, train_loss_epoch=1.05e+7]Epoch 81/3000:   3%|▎         | 80/3000 [00:36<20:42,  2.35it/s, v_num=1, train_loss_step=1.07e+7, train_loss_epoch=1.05e+7]Epoch 81/3000:   3%|▎         | 81/3000 [00:37<20:34,  2.36it/s, v_num=1, train_loss_step=1.07e+7, train_loss_epoch=1.05e+7]Epoch 81/3000:   3%|▎         | 81/3000 [00:37<20:34,  2.36it/s, v_num=1, train_loss_step=1.05e+7, train_loss_epoch=1.04e+7]Epoch 82/3000:   3%|▎         | 81/3000 [00:37<20:34,  2.36it/s, v_num=1, train_loss_step=1.05e+7, train_loss_epoch=1.04e+7]Epoch 82/3000:   3%|▎         | 82/3000 [00:37<20:27,  2.38it/s, v_num=1, train_loss_step=1.05e+7, train_loss_epoch=1.04e+7]Epoch 82/3000:   3%|▎         | 82/3000 [00:37<20:27,  2.38it/s, v_num=1, train_loss_step=1.02e+7, train_loss_epoch=1.03e+7]Epoch 83/3000:   3%|▎         | 82/3000 [00:37<20:27,  2.38it/s, v_num=1, train_loss_step=1.02e+7, train_loss_epoch=1.03e+7]Epoch 83/3000:   3%|▎         | 83/3000 [00:37<20:14,  2.40it/s, v_num=1, train_loss_step=1.02e+7, train_loss_epoch=1.03e+7]Epoch 83/3000:   3%|▎         | 83/3000 [00:37<20:14,  2.40it/s, v_num=1, train_loss_step=9.6e+6, train_loss_epoch=1.03e+7] Epoch 84/3000:   3%|▎         | 83/3000 [00:37<20:14,  2.40it/s, v_num=1, train_loss_step=9.6e+6, train_loss_epoch=1.03e+7]Epoch 84/3000:   3%|▎         | 84/3000 [00:38<20:36,  2.36it/s, v_num=1, train_loss_step=9.6e+6, train_loss_epoch=1.03e+7]Epoch 84/3000:   3%|▎         | 84/3000 [00:38<20:36,  2.36it/s, v_num=1, train_loss_step=1.11e+7, train_loss_epoch=1.02e+7]Epoch 85/3000:   3%|▎         | 84/3000 [00:38<20:36,  2.36it/s, v_num=1, train_loss_step=1.11e+7, train_loss_epoch=1.02e+7]Epoch 85/3000:   3%|▎         | 85/3000 [00:38<20:39,  2.35it/s, v_num=1, train_loss_step=1.11e+7, train_loss_epoch=1.02e+7]Epoch 85/3000:   3%|▎         | 85/3000 [00:38<20:39,  2.35it/s, v_num=1, train_loss_step=1e+7, train_loss_epoch=1.02e+7]   Epoch 86/3000:   3%|▎         | 85/3000 [00:38<20:39,  2.35it/s, v_num=1, train_loss_step=1e+7, train_loss_epoch=1.02e+7]Epoch 86/3000:   3%|▎         | 86/3000 [00:39<21:18,  2.28it/s, v_num=1, train_loss_step=1e+7, train_loss_epoch=1.02e+7]Epoch 86/3000:   3%|▎         | 86/3000 [00:39<21:18,  2.28it/s, v_num=1, train_loss_step=9.28e+6, train_loss_epoch=1.01e+7]Epoch 87/3000:   3%|▎         | 86/3000 [00:39<21:18,  2.28it/s, v_num=1, train_loss_step=9.28e+6, train_loss_epoch=1.01e+7]Epoch 87/3000:   3%|▎         | 87/3000 [00:39<21:17,  2.28it/s, v_num=1, train_loss_step=9.28e+6, train_loss_epoch=1.01e+7]Epoch 87/3000:   3%|▎         | 87/3000 [00:39<21:17,  2.28it/s, v_num=1, train_loss_step=1.05e+7, train_loss_epoch=1.01e+7]Epoch 88/3000:   3%|▎         | 87/3000 [00:39<21:17,  2.28it/s, v_num=1, train_loss_step=1.05e+7, train_loss_epoch=1.01e+7]Epoch 88/3000:   3%|▎         | 88/3000 [00:40<22:03,  2.20it/s, v_num=1, train_loss_step=1.05e+7, train_loss_epoch=1.01e+7]Epoch 88/3000:   3%|▎         | 88/3000 [00:40<22:03,  2.20it/s, v_num=1, train_loss_step=1.01e+7, train_loss_epoch=9.99e+6]Epoch 89/3000:   3%|▎         | 88/3000 [00:40<22:03,  2.20it/s, v_num=1, train_loss_step=1.01e+7, train_loss_epoch=9.99e+6]Epoch 89/3000:   3%|▎         | 89/3000 [00:40<21:10,  2.29it/s, v_num=1, train_loss_step=1.01e+7, train_loss_epoch=9.99e+6]Epoch 89/3000:   3%|▎         | 89/3000 [00:40<21:10,  2.29it/s, v_num=1, train_loss_step=9.18e+6, train_loss_epoch=9.94e+6]Epoch 90/3000:   3%|▎         | 89/3000 [00:40<21:10,  2.29it/s, v_num=1, train_loss_step=9.18e+6, train_loss_epoch=9.94e+6]Epoch 90/3000:   3%|▎         | 90/3000 [00:41<21:16,  2.28it/s, v_num=1, train_loss_step=9.18e+6, train_loss_epoch=9.94e+6]Epoch 90/3000:   3%|▎         | 90/3000 [00:41<21:16,  2.28it/s, v_num=1, train_loss_step=1.05e+7, train_loss_epoch=9.88e+6]Epoch 91/3000:   3%|▎         | 90/3000 [00:41<21:16,  2.28it/s, v_num=1, train_loss_step=1.05e+7, train_loss_epoch=9.88e+6]Epoch 91/3000:   3%|▎         | 91/3000 [00:41<21:37,  2.24it/s, v_num=1, train_loss_step=1.05e+7, train_loss_epoch=9.88e+6]Epoch 91/3000:   3%|▎         | 91/3000 [00:41<21:37,  2.24it/s, v_num=1, train_loss_step=9.3e+6, train_loss_epoch=9.83e+6] Epoch 92/3000:   3%|▎         | 91/3000 [00:41<21:37,  2.24it/s, v_num=1, train_loss_step=9.3e+6, train_loss_epoch=9.83e+6]Epoch 92/3000:   3%|▎         | 92/3000 [00:41<21:57,  2.21it/s, v_num=1, train_loss_step=9.3e+6, train_loss_epoch=9.83e+6]Epoch 92/3000:   3%|▎         | 92/3000 [00:41<21:57,  2.21it/s, v_num=1, train_loss_step=9.82e+6, train_loss_epoch=9.77e+6]Epoch 93/3000:   3%|▎         | 92/3000 [00:42<21:57,  2.21it/s, v_num=1, train_loss_step=9.82e+6, train_loss_epoch=9.77e+6]Epoch 93/3000:   3%|▎         | 93/3000 [00:42<23:08,  2.09it/s, v_num=1, train_loss_step=9.82e+6, train_loss_epoch=9.77e+6]Epoch 93/3000:   3%|▎         | 93/3000 [00:42<23:08,  2.09it/s, v_num=1, train_loss_step=9.25e+6, train_loss_epoch=9.72e+6]Epoch 94/3000:   3%|▎         | 93/3000 [00:42<23:08,  2.09it/s, v_num=1, train_loss_step=9.25e+6, train_loss_epoch=9.72e+6]Epoch 94/3000:   3%|▎         | 94/3000 [00:42<22:22,  2.16it/s, v_num=1, train_loss_step=9.25e+6, train_loss_epoch=9.72e+6]Epoch 94/3000:   3%|▎         | 94/3000 [00:42<22:22,  2.16it/s, v_num=1, train_loss_step=9.71e+6, train_loss_epoch=9.67e+6]Epoch 95/3000:   3%|▎         | 94/3000 [00:42<22:22,  2.16it/s, v_num=1, train_loss_step=9.71e+6, train_loss_epoch=9.67e+6]Epoch 95/3000:   3%|▎         | 95/3000 [00:43<22:35,  2.14it/s, v_num=1, train_loss_step=9.71e+6, train_loss_epoch=9.67e+6]Epoch 95/3000:   3%|▎         | 95/3000 [00:43<22:35,  2.14it/s, v_num=1, train_loss_step=9.24e+6, train_loss_epoch=9.61e+6]Epoch 96/3000:   3%|▎         | 95/3000 [00:43<22:35,  2.14it/s, v_num=1, train_loss_step=9.24e+6, train_loss_epoch=9.61e+6]Epoch 96/3000:   3%|▎         | 96/3000 [00:43<21:39,  2.23it/s, v_num=1, train_loss_step=9.24e+6, train_loss_epoch=9.61e+6]Epoch 96/3000:   3%|▎         | 96/3000 [00:43<21:39,  2.23it/s, v_num=1, train_loss_step=9.92e+6, train_loss_epoch=9.56e+6]Epoch 97/3000:   3%|▎         | 96/3000 [00:43<21:39,  2.23it/s, v_num=1, train_loss_step=9.92e+6, train_loss_epoch=9.56e+6]Epoch 97/3000:   3%|▎         | 97/3000 [00:44<21:45,  2.22it/s, v_num=1, train_loss_step=9.92e+6, train_loss_epoch=9.56e+6]Epoch 97/3000:   3%|▎         | 97/3000 [00:44<21:45,  2.22it/s, v_num=1, train_loss_step=9.54e+6, train_loss_epoch=9.51e+6]Epoch 98/3000:   3%|▎         | 97/3000 [00:44<21:45,  2.22it/s, v_num=1, train_loss_step=9.54e+6, train_loss_epoch=9.51e+6]Epoch 98/3000:   3%|▎         | 98/3000 [00:44<22:25,  2.16it/s, v_num=1, train_loss_step=9.54e+6, train_loss_epoch=9.51e+6]Epoch 98/3000:   3%|▎         | 98/3000 [00:44<22:25,  2.16it/s, v_num=1, train_loss_step=1e+7, train_loss_epoch=9.46e+6]   Epoch 99/3000:   3%|▎         | 98/3000 [00:44<22:25,  2.16it/s, v_num=1, train_loss_step=1e+7, train_loss_epoch=9.46e+6]Epoch 99/3000:   3%|▎         | 99/3000 [00:45<22:00,  2.20it/s, v_num=1, train_loss_step=1e+7, train_loss_epoch=9.46e+6]Epoch 99/3000:   3%|▎         | 99/3000 [00:45<22:00,  2.20it/s, v_num=1, train_loss_step=9.23e+6, train_loss_epoch=9.41e+6]Epoch 100/3000:   3%|▎         | 99/3000 [00:45<22:00,  2.20it/s, v_num=1, train_loss_step=9.23e+6, train_loss_epoch=9.41e+6]Epoch 100/3000:   3%|▎         | 100/3000 [00:45<22:48,  2.12it/s, v_num=1, train_loss_step=9.23e+6, train_loss_epoch=9.41e+6]Epoch 100/3000:   3%|▎         | 100/3000 [00:45<22:48,  2.12it/s, v_num=1, train_loss_step=8.36e+6, train_loss_epoch=9.35e+6]Epoch 101/3000:   3%|▎         | 100/3000 [00:45<22:48,  2.12it/s, v_num=1, train_loss_step=8.36e+6, train_loss_epoch=9.35e+6]Epoch 101/3000:   3%|▎         | 101/3000 [00:46<22:38,  2.13it/s, v_num=1, train_loss_step=8.36e+6, train_loss_epoch=9.35e+6]Epoch 101/3000:   3%|▎         | 101/3000 [00:46<22:38,  2.13it/s, v_num=1, train_loss_step=9.67e+6, train_loss_epoch=9.3e+6] Epoch 102/3000:   3%|▎         | 101/3000 [00:46<22:38,  2.13it/s, v_num=1, train_loss_step=9.67e+6, train_loss_epoch=9.3e+6]Epoch 102/3000:   3%|▎         | 102/3000 [00:46<22:55,  2.11it/s, v_num=1, train_loss_step=9.67e+6, train_loss_epoch=9.3e+6]Epoch 102/3000:   3%|▎         | 102/3000 [00:46<22:55,  2.11it/s, v_num=1, train_loss_step=8.38e+6, train_loss_epoch=9.26e+6]Epoch 103/3000:   3%|▎         | 102/3000 [00:46<22:55,  2.11it/s, v_num=1, train_loss_step=8.38e+6, train_loss_epoch=9.26e+6]Epoch 103/3000:   3%|▎         | 103/3000 [00:47<22:31,  2.14it/s, v_num=1, train_loss_step=8.38e+6, train_loss_epoch=9.26e+6]Epoch 103/3000:   3%|▎         | 103/3000 [00:47<22:31,  2.14it/s, v_num=1, train_loss_step=9.32e+6, train_loss_epoch=9.21e+6]Epoch 104/3000:   3%|▎         | 103/3000 [00:47<22:31,  2.14it/s, v_num=1, train_loss_step=9.32e+6, train_loss_epoch=9.21e+6]Epoch 104/3000:   3%|▎         | 104/3000 [00:47<22:56,  2.10it/s, v_num=1, train_loss_step=9.32e+6, train_loss_epoch=9.21e+6]Epoch 104/3000:   3%|▎         | 104/3000 [00:47<22:56,  2.10it/s, v_num=1, train_loss_step=9.55e+6, train_loss_epoch=9.16e+6]Epoch 105/3000:   3%|▎         | 104/3000 [00:47<22:56,  2.10it/s, v_num=1, train_loss_step=9.55e+6, train_loss_epoch=9.16e+6]Epoch 105/3000:   4%|▎         | 105/3000 [00:48<23:04,  2.09it/s, v_num=1, train_loss_step=9.55e+6, train_loss_epoch=9.16e+6]Epoch 105/3000:   4%|▎         | 105/3000 [00:48<23:04,  2.09it/s, v_num=1, train_loss_step=9.56e+6, train_loss_epoch=9.11e+6]Epoch 106/3000:   4%|▎         | 105/3000 [00:48<23:04,  2.09it/s, v_num=1, train_loss_step=9.56e+6, train_loss_epoch=9.11e+6]Epoch 106/3000:   4%|▎         | 106/3000 [00:48<22:15,  2.17it/s, v_num=1, train_loss_step=9.56e+6, train_loss_epoch=9.11e+6]Epoch 106/3000:   4%|▎         | 106/3000 [00:48<22:15,  2.17it/s, v_num=1, train_loss_step=8.43e+6, train_loss_epoch=9.06e+6]Epoch 107/3000:   4%|▎         | 106/3000 [00:48<22:15,  2.17it/s, v_num=1, train_loss_step=8.43e+6, train_loss_epoch=9.06e+6]Epoch 107/3000:   4%|▎         | 107/3000 [00:48<21:54,  2.20it/s, v_num=1, train_loss_step=8.43e+6, train_loss_epoch=9.06e+6]Epoch 107/3000:   4%|▎         | 107/3000 [00:48<21:54,  2.20it/s, v_num=1, train_loss_step=8.51e+6, train_loss_epoch=9.01e+6]Epoch 108/3000:   4%|▎         | 107/3000 [00:48<21:54,  2.20it/s, v_num=1, train_loss_step=8.51e+6, train_loss_epoch=9.01e+6]Epoch 108/3000:   4%|▎         | 108/3000 [00:49<21:54,  2.20it/s, v_num=1, train_loss_step=8.51e+6, train_loss_epoch=9.01e+6]Epoch 108/3000:   4%|▎         | 108/3000 [00:49<21:54,  2.20it/s, v_num=1, train_loss_step=9.22e+6, train_loss_epoch=8.97e+6]Epoch 109/3000:   4%|▎         | 108/3000 [00:49<21:54,  2.20it/s, v_num=1, train_loss_step=9.22e+6, train_loss_epoch=8.97e+6]Epoch 109/3000:   4%|▎         | 109/3000 [00:49<21:39,  2.22it/s, v_num=1, train_loss_step=9.22e+6, train_loss_epoch=8.97e+6]Epoch 109/3000:   4%|▎         | 109/3000 [00:49<21:39,  2.22it/s, v_num=1, train_loss_step=9.44e+6, train_loss_epoch=8.92e+6]Epoch 110/3000:   4%|▎         | 109/3000 [00:49<21:39,  2.22it/s, v_num=1, train_loss_step=9.44e+6, train_loss_epoch=8.92e+6]Epoch 110/3000:   4%|▎         | 110/3000 [00:50<21:39,  2.22it/s, v_num=1, train_loss_step=9.44e+6, train_loss_epoch=8.92e+6]Epoch 110/3000:   4%|▎         | 110/3000 [00:50<21:39,  2.22it/s, v_num=1, train_loss_step=9.28e+6, train_loss_epoch=8.87e+6]Epoch 111/3000:   4%|▎         | 110/3000 [00:50<21:39,  2.22it/s, v_num=1, train_loss_step=9.28e+6, train_loss_epoch=8.87e+6]Epoch 111/3000:   4%|▎         | 111/3000 [00:50<21:21,  2.25it/s, v_num=1, train_loss_step=9.28e+6, train_loss_epoch=8.87e+6]Epoch 111/3000:   4%|▎         | 111/3000 [00:50<21:21,  2.25it/s, v_num=1, train_loss_step=8.67e+6, train_loss_epoch=8.83e+6]Epoch 112/3000:   4%|▎         | 111/3000 [00:50<21:21,  2.25it/s, v_num=1, train_loss_step=8.67e+6, train_loss_epoch=8.83e+6]Epoch 112/3000:   4%|▎         | 112/3000 [00:51<21:45,  2.21it/s, v_num=1, train_loss_step=8.67e+6, train_loss_epoch=8.83e+6]Epoch 112/3000:   4%|▎         | 112/3000 [00:51<21:45,  2.21it/s, v_num=1, train_loss_step=9.09e+6, train_loss_epoch=8.78e+6]Epoch 113/3000:   4%|▎         | 112/3000 [00:51<21:45,  2.21it/s, v_num=1, train_loss_step=9.09e+6, train_loss_epoch=8.78e+6]Epoch 113/3000:   4%|▍         | 113/3000 [00:51<21:13,  2.27it/s, v_num=1, train_loss_step=9.09e+6, train_loss_epoch=8.78e+6]Epoch 113/3000:   4%|▍         | 113/3000 [00:51<21:13,  2.27it/s, v_num=1, train_loss_step=8.55e+6, train_loss_epoch=8.74e+6]Epoch 114/3000:   4%|▍         | 113/3000 [00:51<21:13,  2.27it/s, v_num=1, train_loss_step=8.55e+6, train_loss_epoch=8.74e+6]Epoch 114/3000:   4%|▍         | 114/3000 [00:52<20:59,  2.29it/s, v_num=1, train_loss_step=8.55e+6, train_loss_epoch=8.74e+6]Epoch 114/3000:   4%|▍         | 114/3000 [00:52<20:59,  2.29it/s, v_num=1, train_loss_step=8.61e+6, train_loss_epoch=8.69e+6]Epoch 115/3000:   4%|▍         | 114/3000 [00:52<20:59,  2.29it/s, v_num=1, train_loss_step=8.61e+6, train_loss_epoch=8.69e+6]Epoch 115/3000:   4%|▍         | 115/3000 [00:52<20:47,  2.31it/s, v_num=1, train_loss_step=8.61e+6, train_loss_epoch=8.69e+6]Epoch 115/3000:   4%|▍         | 115/3000 [00:52<20:47,  2.31it/s, v_num=1, train_loss_step=7.41e+6, train_loss_epoch=8.65e+6]Epoch 116/3000:   4%|▍         | 115/3000 [00:52<20:47,  2.31it/s, v_num=1, train_loss_step=7.41e+6, train_loss_epoch=8.65e+6]Epoch 116/3000:   4%|▍         | 116/3000 [00:52<19:57,  2.41it/s, v_num=1, train_loss_step=7.41e+6, train_loss_epoch=8.65e+6]Epoch 116/3000:   4%|▍         | 116/3000 [00:52<19:57,  2.41it/s, v_num=1, train_loss_step=9.06e+6, train_loss_epoch=8.6e+6] Epoch 117/3000:   4%|▍         | 116/3000 [00:52<19:57,  2.41it/s, v_num=1, train_loss_step=9.06e+6, train_loss_epoch=8.6e+6]Epoch 117/3000:   4%|▍         | 117/3000 [00:53<20:49,  2.31it/s, v_num=1, train_loss_step=9.06e+6, train_loss_epoch=8.6e+6]Epoch 117/3000:   4%|▍         | 117/3000 [00:53<20:49,  2.31it/s, v_num=1, train_loss_step=8.33e+6, train_loss_epoch=8.56e+6]Epoch 118/3000:   4%|▍         | 117/3000 [00:53<20:49,  2.31it/s, v_num=1, train_loss_step=8.33e+6, train_loss_epoch=8.56e+6]Epoch 118/3000:   4%|▍         | 118/3000 [00:53<20:43,  2.32it/s, v_num=1, train_loss_step=8.33e+6, train_loss_epoch=8.56e+6]Epoch 118/3000:   4%|▍         | 118/3000 [00:53<20:43,  2.32it/s, v_num=1, train_loss_step=8.05e+6, train_loss_epoch=8.52e+6]Epoch 119/3000:   4%|▍         | 118/3000 [00:53<20:43,  2.32it/s, v_num=1, train_loss_step=8.05e+6, train_loss_epoch=8.52e+6]Epoch 119/3000:   4%|▍         | 119/3000 [00:54<20:56,  2.29it/s, v_num=1, train_loss_step=8.05e+6, train_loss_epoch=8.52e+6]Epoch 119/3000:   4%|▍         | 119/3000 [00:54<20:56,  2.29it/s, v_num=1, train_loss_step=8.69e+6, train_loss_epoch=8.47e+6]Epoch 120/3000:   4%|▍         | 119/3000 [00:54<20:56,  2.29it/s, v_num=1, train_loss_step=8.69e+6, train_loss_epoch=8.47e+6]Epoch 120/3000:   4%|▍         | 120/3000 [00:54<20:27,  2.35it/s, v_num=1, train_loss_step=8.69e+6, train_loss_epoch=8.47e+6]Epoch 120/3000:   4%|▍         | 120/3000 [00:54<20:27,  2.35it/s, v_num=1, train_loss_step=7.77e+6, train_loss_epoch=8.43e+6]Epoch 121/3000:   4%|▍         | 120/3000 [00:54<20:27,  2.35it/s, v_num=1, train_loss_step=7.77e+6, train_loss_epoch=8.43e+6]Epoch 121/3000:   4%|▍         | 121/3000 [00:54<19:12,  2.50it/s, v_num=1, train_loss_step=7.77e+6, train_loss_epoch=8.43e+6]Epoch 121/3000:   4%|▍         | 121/3000 [00:54<19:12,  2.50it/s, v_num=1, train_loss_step=7.97e+6, train_loss_epoch=8.39e+6]Epoch 122/3000:   4%|▍         | 121/3000 [00:54<19:12,  2.50it/s, v_num=1, train_loss_step=7.97e+6, train_loss_epoch=8.39e+6]Epoch 122/3000:   4%|▍         | 122/3000 [00:55<20:06,  2.39it/s, v_num=1, train_loss_step=7.97e+6, train_loss_epoch=8.39e+6]Epoch 122/3000:   4%|▍         | 122/3000 [00:55<20:06,  2.39it/s, v_num=1, train_loss_step=8.5e+6, train_loss_epoch=8.35e+6] Epoch 123/3000:   4%|▍         | 122/3000 [00:55<20:06,  2.39it/s, v_num=1, train_loss_step=8.5e+6, train_loss_epoch=8.35e+6]Epoch 123/3000:   4%|▍         | 123/3000 [00:55<20:05,  2.39it/s, v_num=1, train_loss_step=8.5e+6, train_loss_epoch=8.35e+6]Epoch 123/3000:   4%|▍         | 123/3000 [00:55<20:05,  2.39it/s, v_num=1, train_loss_step=8.18e+6, train_loss_epoch=8.31e+6]Epoch 124/3000:   4%|▍         | 123/3000 [00:55<20:05,  2.39it/s, v_num=1, train_loss_step=8.18e+6, train_loss_epoch=8.31e+6]Epoch 124/3000:   4%|▍         | 124/3000 [00:56<20:55,  2.29it/s, v_num=1, train_loss_step=8.18e+6, train_loss_epoch=8.31e+6]Epoch 124/3000:   4%|▍         | 124/3000 [00:56<20:55,  2.29it/s, v_num=1, train_loss_step=9.01e+6, train_loss_epoch=8.27e+6]Epoch 125/3000:   4%|▍         | 124/3000 [00:56<20:55,  2.29it/s, v_num=1, train_loss_step=9.01e+6, train_loss_epoch=8.27e+6]Epoch 125/3000:   4%|▍         | 125/3000 [00:56<21:28,  2.23it/s, v_num=1, train_loss_step=9.01e+6, train_loss_epoch=8.27e+6]Epoch 125/3000:   4%|▍         | 125/3000 [00:56<21:28,  2.23it/s, v_num=1, train_loss_step=9.19e+6, train_loss_epoch=8.22e+6]Epoch 126/3000:   4%|▍         | 125/3000 [00:56<21:28,  2.23it/s, v_num=1, train_loss_step=9.19e+6, train_loss_epoch=8.22e+6]Epoch 126/3000:   4%|▍         | 126/3000 [00:57<20:03,  2.39it/s, v_num=1, train_loss_step=9.19e+6, train_loss_epoch=8.22e+6]Epoch 126/3000:   4%|▍         | 126/3000 [00:57<20:03,  2.39it/s, v_num=1, train_loss_step=7.48e+6, train_loss_epoch=8.18e+6]Epoch 127/3000:   4%|▍         | 126/3000 [00:57<20:03,  2.39it/s, v_num=1, train_loss_step=7.48e+6, train_loss_epoch=8.18e+6]Epoch 127/3000:   4%|▍         | 127/3000 [00:57<19:53,  2.41it/s, v_num=1, train_loss_step=7.48e+6, train_loss_epoch=8.18e+6]Epoch 127/3000:   4%|▍         | 127/3000 [00:57<19:53,  2.41it/s, v_num=1, train_loss_step=7.28e+6, train_loss_epoch=8.14e+6]Epoch 128/3000:   4%|▍         | 127/3000 [00:57<19:53,  2.41it/s, v_num=1, train_loss_step=7.28e+6, train_loss_epoch=8.14e+6]Epoch 128/3000:   4%|▍         | 128/3000 [00:57<19:32,  2.45it/s, v_num=1, train_loss_step=7.28e+6, train_loss_epoch=8.14e+6]Epoch 128/3000:   4%|▍         | 128/3000 [00:57<19:32,  2.45it/s, v_num=1, train_loss_step=8.45e+6, train_loss_epoch=8.1e+6] Epoch 129/3000:   4%|▍         | 128/3000 [00:57<19:32,  2.45it/s, v_num=1, train_loss_step=8.45e+6, train_loss_epoch=8.1e+6]Epoch 129/3000:   4%|▍         | 129/3000 [00:58<19:58,  2.40it/s, v_num=1, train_loss_step=8.45e+6, train_loss_epoch=8.1e+6]Epoch 129/3000:   4%|▍         | 129/3000 [00:58<19:58,  2.40it/s, v_num=1, train_loss_step=7.91e+6, train_loss_epoch=8.07e+6]Epoch 130/3000:   4%|▍         | 129/3000 [00:58<19:58,  2.40it/s, v_num=1, train_loss_step=7.91e+6, train_loss_epoch=8.07e+6]Epoch 130/3000:   4%|▍         | 130/3000 [00:58<20:06,  2.38it/s, v_num=1, train_loss_step=7.91e+6, train_loss_epoch=8.07e+6]Epoch 130/3000:   4%|▍         | 130/3000 [00:58<20:06,  2.38it/s, v_num=1, train_loss_step=8.1e+6, train_loss_epoch=8.03e+6] Epoch 131/3000:   4%|▍         | 130/3000 [00:58<20:06,  2.38it/s, v_num=1, train_loss_step=8.1e+6, train_loss_epoch=8.03e+6]Epoch 131/3000:   4%|▍         | 131/3000 [00:59<20:37,  2.32it/s, v_num=1, train_loss_step=8.1e+6, train_loss_epoch=8.03e+6]Epoch 131/3000:   4%|▍         | 131/3000 [00:59<20:37,  2.32it/s, v_num=1, train_loss_step=8.17e+6, train_loss_epoch=7.99e+6]Epoch 132/3000:   4%|▍         | 131/3000 [00:59<20:37,  2.32it/s, v_num=1, train_loss_step=8.17e+6, train_loss_epoch=7.99e+6]Epoch 132/3000:   4%|▍         | 132/3000 [00:59<19:18,  2.47it/s, v_num=1, train_loss_step=8.17e+6, train_loss_epoch=7.99e+6]Epoch 132/3000:   4%|▍         | 132/3000 [00:59<19:18,  2.47it/s, v_num=1, train_loss_step=7.61e+6, train_loss_epoch=7.95e+6]Epoch 133/3000:   4%|▍         | 132/3000 [00:59<19:18,  2.47it/s, v_num=1, train_loss_step=7.61e+6, train_loss_epoch=7.95e+6]Epoch 133/3000:   4%|▍         | 133/3000 [00:59<18:28,  2.59it/s, v_num=1, train_loss_step=7.61e+6, train_loss_epoch=7.95e+6]Epoch 133/3000:   4%|▍         | 133/3000 [00:59<18:28,  2.59it/s, v_num=1, train_loss_step=8.09e+6, train_loss_epoch=7.91e+6]Epoch 134/3000:   4%|▍         | 133/3000 [00:59<18:28,  2.59it/s, v_num=1, train_loss_step=8.09e+6, train_loss_epoch=7.91e+6]Epoch 134/3000:   4%|▍         | 134/3000 [01:00<18:50,  2.54it/s, v_num=1, train_loss_step=8.09e+6, train_loss_epoch=7.91e+6]Epoch 134/3000:   4%|▍         | 134/3000 [01:00<18:50,  2.54it/s, v_num=1, train_loss_step=8.66e+6, train_loss_epoch=7.87e+6]Epoch 135/3000:   4%|▍         | 134/3000 [01:00<18:50,  2.54it/s, v_num=1, train_loss_step=8.66e+6, train_loss_epoch=7.87e+6]Epoch 135/3000:   4%|▍         | 135/3000 [01:00<18:30,  2.58it/s, v_num=1, train_loss_step=8.66e+6, train_loss_epoch=7.87e+6]Epoch 135/3000:   4%|▍         | 135/3000 [01:00<18:30,  2.58it/s, v_num=1, train_loss_step=7.98e+6, train_loss_epoch=7.84e+6]Epoch 136/3000:   4%|▍         | 135/3000 [01:00<18:30,  2.58it/s, v_num=1, train_loss_step=7.98e+6, train_loss_epoch=7.84e+6]Epoch 136/3000:   5%|▍         | 136/3000 [01:01<17:58,  2.66it/s, v_num=1, train_loss_step=7.98e+6, train_loss_epoch=7.84e+6]Epoch 136/3000:   5%|▍         | 136/3000 [01:01<17:58,  2.66it/s, v_num=1, train_loss_step=7.12e+6, train_loss_epoch=7.8e+6] Epoch 137/3000:   5%|▍         | 136/3000 [01:01<17:58,  2.66it/s, v_num=1, train_loss_step=7.12e+6, train_loss_epoch=7.8e+6]Epoch 137/3000:   5%|▍         | 137/3000 [01:01<19:08,  2.49it/s, v_num=1, train_loss_step=7.12e+6, train_loss_epoch=7.8e+6]Epoch 137/3000:   5%|▍         | 137/3000 [01:01<19:08,  2.49it/s, v_num=1, train_loss_step=7.8e+6, train_loss_epoch=7.76e+6]Epoch 138/3000:   5%|▍         | 137/3000 [01:01<19:08,  2.49it/s, v_num=1, train_loss_step=7.8e+6, train_loss_epoch=7.76e+6]Epoch 138/3000:   5%|▍         | 138/3000 [01:02<20:34,  2.32it/s, v_num=1, train_loss_step=7.8e+6, train_loss_epoch=7.76e+6]Epoch 138/3000:   5%|▍         | 138/3000 [01:02<20:34,  2.32it/s, v_num=1, train_loss_step=7.32e+6, train_loss_epoch=7.73e+6]Epoch 139/3000:   5%|▍         | 138/3000 [01:02<20:34,  2.32it/s, v_num=1, train_loss_step=7.32e+6, train_loss_epoch=7.73e+6]Epoch 139/3000:   5%|▍         | 139/3000 [01:02<21:02,  2.27it/s, v_num=1, train_loss_step=7.32e+6, train_loss_epoch=7.73e+6]Epoch 139/3000:   5%|▍         | 139/3000 [01:02<21:02,  2.27it/s, v_num=1, train_loss_step=8.09e+6, train_loss_epoch=7.69e+6]Epoch 140/3000:   5%|▍         | 139/3000 [01:02<21:02,  2.27it/s, v_num=1, train_loss_step=8.09e+6, train_loss_epoch=7.69e+6]Epoch 140/3000:   5%|▍         | 140/3000 [01:02<21:08,  2.26it/s, v_num=1, train_loss_step=8.09e+6, train_loss_epoch=7.69e+6]Epoch 140/3000:   5%|▍         | 140/3000 [01:02<21:08,  2.26it/s, v_num=1, train_loss_step=8.11e+6, train_loss_epoch=7.65e+6]Epoch 141/3000:   5%|▍         | 140/3000 [01:02<21:08,  2.26it/s, v_num=1, train_loss_step=8.11e+6, train_loss_epoch=7.65e+6]Epoch 141/3000:   5%|▍         | 141/3000 [01:03<21:19,  2.24it/s, v_num=1, train_loss_step=8.11e+6, train_loss_epoch=7.65e+6]Epoch 141/3000:   5%|▍         | 141/3000 [01:03<21:19,  2.24it/s, v_num=1, train_loss_step=7.9e+6, train_loss_epoch=7.62e+6] Epoch 142/3000:   5%|▍         | 141/3000 [01:03<21:19,  2.24it/s, v_num=1, train_loss_step=7.9e+6, train_loss_epoch=7.62e+6]Epoch 142/3000:   5%|▍         | 142/3000 [01:03<20:46,  2.29it/s, v_num=1, train_loss_step=7.9e+6, train_loss_epoch=7.62e+6]Epoch 142/3000:   5%|▍         | 142/3000 [01:03<20:46,  2.29it/s, v_num=1, train_loss_step=7.68e+6, train_loss_epoch=7.58e+6]Epoch 143/3000:   5%|▍         | 142/3000 [01:03<20:46,  2.29it/s, v_num=1, train_loss_step=7.68e+6, train_loss_epoch=7.58e+6]Epoch 143/3000:   5%|▍         | 143/3000 [01:04<20:50,  2.28it/s, v_num=1, train_loss_step=7.68e+6, train_loss_epoch=7.58e+6]Epoch 143/3000:   5%|▍         | 143/3000 [01:04<20:50,  2.28it/s, v_num=1, train_loss_step=6.91e+6, train_loss_epoch=7.55e+6]Epoch 144/3000:   5%|▍         | 143/3000 [01:04<20:50,  2.28it/s, v_num=1, train_loss_step=6.91e+6, train_loss_epoch=7.55e+6]Epoch 144/3000:   5%|▍         | 144/3000 [01:04<20:08,  2.36it/s, v_num=1, train_loss_step=6.91e+6, train_loss_epoch=7.55e+6]Epoch 144/3000:   5%|▍         | 144/3000 [01:04<20:08,  2.36it/s, v_num=1, train_loss_step=8.08e+6, train_loss_epoch=7.51e+6]Epoch 145/3000:   5%|▍         | 144/3000 [01:04<20:08,  2.36it/s, v_num=1, train_loss_step=8.08e+6, train_loss_epoch=7.51e+6]Epoch 145/3000:   5%|▍         | 145/3000 [01:04<18:55,  2.51it/s, v_num=1, train_loss_step=8.08e+6, train_loss_epoch=7.51e+6]Epoch 145/3000:   5%|▍         | 145/3000 [01:04<18:55,  2.51it/s, v_num=1, train_loss_step=7.66e+6, train_loss_epoch=7.48e+6]Epoch 146/3000:   5%|▍         | 145/3000 [01:04<18:55,  2.51it/s, v_num=1, train_loss_step=7.66e+6, train_loss_epoch=7.48e+6]Epoch 146/3000:   5%|▍         | 146/3000 [01:05<20:23,  2.33it/s, v_num=1, train_loss_step=7.66e+6, train_loss_epoch=7.48e+6]Epoch 146/3000:   5%|▍         | 146/3000 [01:05<20:23,  2.33it/s, v_num=1, train_loss_step=7.24e+6, train_loss_epoch=7.45e+6]Epoch 147/3000:   5%|▍         | 146/3000 [01:05<20:23,  2.33it/s, v_num=1, train_loss_step=7.24e+6, train_loss_epoch=7.45e+6]Epoch 147/3000:   5%|▍         | 147/3000 [01:05<20:05,  2.37it/s, v_num=1, train_loss_step=7.24e+6, train_loss_epoch=7.45e+6]Epoch 147/3000:   5%|▍         | 147/3000 [01:05<20:05,  2.37it/s, v_num=1, train_loss_step=7.09e+6, train_loss_epoch=7.41e+6]Epoch 148/3000:   5%|▍         | 147/3000 [01:05<20:05,  2.37it/s, v_num=1, train_loss_step=7.09e+6, train_loss_epoch=7.41e+6]Epoch 148/3000:   5%|▍         | 148/3000 [01:06<19:33,  2.43it/s, v_num=1, train_loss_step=7.09e+6, train_loss_epoch=7.41e+6]Epoch 148/3000:   5%|▍         | 148/3000 [01:06<19:33,  2.43it/s, v_num=1, train_loss_step=7.88e+6, train_loss_epoch=7.38e+6]Epoch 149/3000:   5%|▍         | 148/3000 [01:06<19:33,  2.43it/s, v_num=1, train_loss_step=7.88e+6, train_loss_epoch=7.38e+6]Epoch 149/3000:   5%|▍         | 149/3000 [01:06<19:33,  2.43it/s, v_num=1, train_loss_step=7.88e+6, train_loss_epoch=7.38e+6]Epoch 149/3000:   5%|▍         | 149/3000 [01:06<19:33,  2.43it/s, v_num=1, train_loss_step=7.64e+6, train_loss_epoch=7.35e+6]Epoch 150/3000:   5%|▍         | 149/3000 [01:06<19:33,  2.43it/s, v_num=1, train_loss_step=7.64e+6, train_loss_epoch=7.35e+6]Epoch 150/3000:   5%|▌         | 150/3000 [01:07<20:01,  2.37it/s, v_num=1, train_loss_step=7.64e+6, train_loss_epoch=7.35e+6]Epoch 150/3000:   5%|▌         | 150/3000 [01:07<20:01,  2.37it/s, v_num=1, train_loss_step=7.15e+6, train_loss_epoch=7.31e+6]Epoch 151/3000:   5%|▌         | 150/3000 [01:07<20:01,  2.37it/s, v_num=1, train_loss_step=7.15e+6, train_loss_epoch=7.31e+6]Epoch 151/3000:   5%|▌         | 151/3000 [01:07<20:02,  2.37it/s, v_num=1, train_loss_step=7.15e+6, train_loss_epoch=7.31e+6]Epoch 151/3000:   5%|▌         | 151/3000 [01:07<20:02,  2.37it/s, v_num=1, train_loss_step=7.3e+6, train_loss_epoch=7.28e+6] Epoch 152/3000:   5%|▌         | 151/3000 [01:07<20:02,  2.37it/s, v_num=1, train_loss_step=7.3e+6, train_loss_epoch=7.28e+6]Epoch 152/3000:   5%|▌         | 152/3000 [01:07<19:59,  2.37it/s, v_num=1, train_loss_step=7.3e+6, train_loss_epoch=7.28e+6]Epoch 152/3000:   5%|▌         | 152/3000 [01:07<19:59,  2.37it/s, v_num=1, train_loss_step=8.12e+6, train_loss_epoch=7.25e+6]Epoch 153/3000:   5%|▌         | 152/3000 [01:07<19:59,  2.37it/s, v_num=1, train_loss_step=8.12e+6, train_loss_epoch=7.25e+6]Epoch 153/3000:   5%|▌         | 153/3000 [01:08<19:40,  2.41it/s, v_num=1, train_loss_step=8.12e+6, train_loss_epoch=7.25e+6]Epoch 153/3000:   5%|▌         | 153/3000 [01:08<19:40,  2.41it/s, v_num=1, train_loss_step=7.6e+6, train_loss_epoch=7.22e+6] Epoch 154/3000:   5%|▌         | 153/3000 [01:08<19:40,  2.41it/s, v_num=1, train_loss_step=7.6e+6, train_loss_epoch=7.22e+6]Epoch 154/3000:   5%|▌         | 154/3000 [01:08<19:43,  2.40it/s, v_num=1, train_loss_step=7.6e+6, train_loss_epoch=7.22e+6]Epoch 154/3000:   5%|▌         | 154/3000 [01:08<19:43,  2.40it/s, v_num=1, train_loss_step=7.55e+6, train_loss_epoch=7.18e+6]Epoch 155/3000:   5%|▌         | 154/3000 [01:08<19:43,  2.40it/s, v_num=1, train_loss_step=7.55e+6, train_loss_epoch=7.18e+6]Epoch 155/3000:   5%|▌         | 155/3000 [01:09<19:57,  2.38it/s, v_num=1, train_loss_step=7.55e+6, train_loss_epoch=7.18e+6]Epoch 155/3000:   5%|▌         | 155/3000 [01:09<19:57,  2.38it/s, v_num=1, train_loss_step=7.42e+6, train_loss_epoch=7.15e+6]Epoch 156/3000:   5%|▌         | 155/3000 [01:09<19:57,  2.38it/s, v_num=1, train_loss_step=7.42e+6, train_loss_epoch=7.15e+6]Epoch 156/3000:   5%|▌         | 156/3000 [01:09<19:18,  2.46it/s, v_num=1, train_loss_step=7.42e+6, train_loss_epoch=7.15e+6]Epoch 156/3000:   5%|▌         | 156/3000 [01:09<19:18,  2.46it/s, v_num=1, train_loss_step=7.57e+6, train_loss_epoch=7.12e+6]Epoch 157/3000:   5%|▌         | 156/3000 [01:09<19:18,  2.46it/s, v_num=1, train_loss_step=7.57e+6, train_loss_epoch=7.12e+6]Epoch 157/3000:   5%|▌         | 157/3000 [01:09<18:25,  2.57it/s, v_num=1, train_loss_step=7.57e+6, train_loss_epoch=7.12e+6]Epoch 157/3000:   5%|▌         | 157/3000 [01:09<18:25,  2.57it/s, v_num=1, train_loss_step=7.03e+6, train_loss_epoch=7.09e+6]Epoch 158/3000:   5%|▌         | 157/3000 [01:09<18:25,  2.57it/s, v_num=1, train_loss_step=7.03e+6, train_loss_epoch=7.09e+6]Epoch 158/3000:   5%|▌         | 158/3000 [01:10<18:34,  2.55it/s, v_num=1, train_loss_step=7.03e+6, train_loss_epoch=7.09e+6]Epoch 158/3000:   5%|▌         | 158/3000 [01:10<18:34,  2.55it/s, v_num=1, train_loss_step=6.62e+6, train_loss_epoch=7.06e+6]Epoch 159/3000:   5%|▌         | 158/3000 [01:10<18:34,  2.55it/s, v_num=1, train_loss_step=6.62e+6, train_loss_epoch=7.06e+6]Epoch 159/3000:   5%|▌         | 159/3000 [01:10<19:30,  2.43it/s, v_num=1, train_loss_step=6.62e+6, train_loss_epoch=7.06e+6]Epoch 159/3000:   5%|▌         | 159/3000 [01:10<19:30,  2.43it/s, v_num=1, train_loss_step=7.24e+6, train_loss_epoch=7.03e+6]Epoch 160/3000:   5%|▌         | 159/3000 [01:10<19:30,  2.43it/s, v_num=1, train_loss_step=7.24e+6, train_loss_epoch=7.03e+6]Epoch 160/3000:   5%|▌         | 160/3000 [01:11<21:01,  2.25it/s, v_num=1, train_loss_step=7.24e+6, train_loss_epoch=7.03e+6]Epoch 160/3000:   5%|▌         | 160/3000 [01:11<21:01,  2.25it/s, v_num=1, train_loss_step=6.25e+6, train_loss_epoch=7e+6]   Epoch 161/3000:   5%|▌         | 160/3000 [01:11<21:01,  2.25it/s, v_num=1, train_loss_step=6.25e+6, train_loss_epoch=7e+6]Epoch 161/3000:   5%|▌         | 161/3000 [01:11<21:11,  2.23it/s, v_num=1, train_loss_step=6.25e+6, train_loss_epoch=7e+6]Epoch 161/3000:   5%|▌         | 161/3000 [01:11<21:11,  2.23it/s, v_num=1, train_loss_step=6.98e+6, train_loss_epoch=6.97e+6]Epoch 162/3000:   5%|▌         | 161/3000 [01:11<21:11,  2.23it/s, v_num=1, train_loss_step=6.98e+6, train_loss_epoch=6.97e+6]Epoch 162/3000:   5%|▌         | 162/3000 [01:12<21:16,  2.22it/s, v_num=1, train_loss_step=6.98e+6, train_loss_epoch=6.97e+6]Epoch 162/3000:   5%|▌         | 162/3000 [01:12<21:16,  2.22it/s, v_num=1, train_loss_step=7.58e+6, train_loss_epoch=6.94e+6]Epoch 163/3000:   5%|▌         | 162/3000 [01:12<21:16,  2.22it/s, v_num=1, train_loss_step=7.58e+6, train_loss_epoch=6.94e+6]Epoch 163/3000:   5%|▌         | 163/3000 [01:12<21:18,  2.22it/s, v_num=1, train_loss_step=7.58e+6, train_loss_epoch=6.94e+6]Epoch 163/3000:   5%|▌         | 163/3000 [01:12<21:18,  2.22it/s, v_num=1, train_loss_step=7.15e+6, train_loss_epoch=6.91e+6]Epoch 164/3000:   5%|▌         | 163/3000 [01:12<21:18,  2.22it/s, v_num=1, train_loss_step=7.15e+6, train_loss_epoch=6.91e+6]Epoch 164/3000:   5%|▌         | 164/3000 [01:13<20:45,  2.28it/s, v_num=1, train_loss_step=7.15e+6, train_loss_epoch=6.91e+6]Epoch 164/3000:   5%|▌         | 164/3000 [01:13<20:45,  2.28it/s, v_num=1, train_loss_step=6.82e+6, train_loss_epoch=6.88e+6]Epoch 165/3000:   5%|▌         | 164/3000 [01:13<20:45,  2.28it/s, v_num=1, train_loss_step=6.82e+6, train_loss_epoch=6.88e+6]Epoch 165/3000:   6%|▌         | 165/3000 [01:13<20:18,  2.33it/s, v_num=1, train_loss_step=6.82e+6, train_loss_epoch=6.88e+6]Epoch 165/3000:   6%|▌         | 165/3000 [01:13<20:18,  2.33it/s, v_num=1, train_loss_step=7.13e+6, train_loss_epoch=6.85e+6]Epoch 166/3000:   6%|▌         | 165/3000 [01:13<20:18,  2.33it/s, v_num=1, train_loss_step=7.13e+6, train_loss_epoch=6.85e+6]Epoch 166/3000:   6%|▌         | 166/3000 [01:13<20:41,  2.28it/s, v_num=1, train_loss_step=7.13e+6, train_loss_epoch=6.85e+6]Epoch 166/3000:   6%|▌         | 166/3000 [01:13<20:41,  2.28it/s, v_num=1, train_loss_step=6.63e+6, train_loss_epoch=6.82e+6]Epoch 167/3000:   6%|▌         | 166/3000 [01:13<20:41,  2.28it/s, v_num=1, train_loss_step=6.63e+6, train_loss_epoch=6.82e+6]Epoch 167/3000:   6%|▌         | 167/3000 [01:14<20:38,  2.29it/s, v_num=1, train_loss_step=6.63e+6, train_loss_epoch=6.82e+6]Epoch 167/3000:   6%|▌         | 167/3000 [01:14<20:38,  2.29it/s, v_num=1, train_loss_step=6.88e+6, train_loss_epoch=6.79e+6]Epoch 168/3000:   6%|▌         | 167/3000 [01:14<20:38,  2.29it/s, v_num=1, train_loss_step=6.88e+6, train_loss_epoch=6.79e+6]Epoch 168/3000:   6%|▌         | 168/3000 [01:14<20:30,  2.30it/s, v_num=1, train_loss_step=6.88e+6, train_loss_epoch=6.79e+6]Epoch 168/3000:   6%|▌         | 168/3000 [01:14<20:30,  2.30it/s, v_num=1, train_loss_step=6.94e+6, train_loss_epoch=6.76e+6]Epoch 169/3000:   6%|▌         | 168/3000 [01:14<20:30,  2.30it/s, v_num=1, train_loss_step=6.94e+6, train_loss_epoch=6.76e+6]Epoch 169/3000:   6%|▌         | 169/3000 [01:15<19:44,  2.39it/s, v_num=1, train_loss_step=6.94e+6, train_loss_epoch=6.76e+6]Epoch 169/3000:   6%|▌         | 169/3000 [01:15<19:44,  2.39it/s, v_num=1, train_loss_step=7.02e+6, train_loss_epoch=6.73e+6]Epoch 170/3000:   6%|▌         | 169/3000 [01:15<19:44,  2.39it/s, v_num=1, train_loss_step=7.02e+6, train_loss_epoch=6.73e+6]Epoch 170/3000:   6%|▌         | 170/3000 [01:15<18:53,  2.50it/s, v_num=1, train_loss_step=7.02e+6, train_loss_epoch=6.73e+6]Epoch 170/3000:   6%|▌         | 170/3000 [01:15<18:53,  2.50it/s, v_num=1, train_loss_step=6.92e+6, train_loss_epoch=6.71e+6]Epoch 171/3000:   6%|▌         | 170/3000 [01:15<18:53,  2.50it/s, v_num=1, train_loss_step=6.92e+6, train_loss_epoch=6.71e+6]Epoch 171/3000:   6%|▌         | 171/3000 [01:16<20:56,  2.25it/s, v_num=1, train_loss_step=6.92e+6, train_loss_epoch=6.71e+6]Epoch 171/3000:   6%|▌         | 171/3000 [01:16<20:56,  2.25it/s, v_num=1, train_loss_step=7.51e+6, train_loss_epoch=6.68e+6]Epoch 172/3000:   6%|▌         | 171/3000 [01:16<20:56,  2.25it/s, v_num=1, train_loss_step=7.51e+6, train_loss_epoch=6.68e+6]Epoch 172/3000:   6%|▌         | 172/3000 [01:16<20:38,  2.28it/s, v_num=1, train_loss_step=7.51e+6, train_loss_epoch=6.68e+6]Epoch 172/3000:   6%|▌         | 172/3000 [01:16<20:38,  2.28it/s, v_num=1, train_loss_step=6.41e+6, train_loss_epoch=6.65e+6]Epoch 173/3000:   6%|▌         | 172/3000 [01:16<20:38,  2.28it/s, v_num=1, train_loss_step=6.41e+6, train_loss_epoch=6.65e+6]Epoch 173/3000:   6%|▌         | 173/3000 [01:17<21:22,  2.20it/s, v_num=1, train_loss_step=6.41e+6, train_loss_epoch=6.65e+6]Epoch 173/3000:   6%|▌         | 173/3000 [01:17<21:22,  2.20it/s, v_num=1, train_loss_step=7.37e+6, train_loss_epoch=6.62e+6]Epoch 174/3000:   6%|▌         | 173/3000 [01:17<21:22,  2.20it/s, v_num=1, train_loss_step=7.37e+6, train_loss_epoch=6.62e+6]Epoch 174/3000:   6%|▌         | 174/3000 [01:17<21:19,  2.21it/s, v_num=1, train_loss_step=7.37e+6, train_loss_epoch=6.62e+6]Epoch 174/3000:   6%|▌         | 174/3000 [01:17<21:19,  2.21it/s, v_num=1, train_loss_step=6.35e+6, train_loss_epoch=6.6e+6] Epoch 175/3000:   6%|▌         | 174/3000 [01:17<21:19,  2.21it/s, v_num=1, train_loss_step=6.35e+6, train_loss_epoch=6.6e+6]Epoch 175/3000:   6%|▌         | 175/3000 [01:17<20:05,  2.34it/s, v_num=1, train_loss_step=6.35e+6, train_loss_epoch=6.6e+6]Epoch 175/3000:   6%|▌         | 175/3000 [01:17<20:05,  2.34it/s, v_num=1, train_loss_step=6.64e+6, train_loss_epoch=6.57e+6]Epoch 176/3000:   6%|▌         | 175/3000 [01:17<20:05,  2.34it/s, v_num=1, train_loss_step=6.64e+6, train_loss_epoch=6.57e+6]Epoch 176/3000:   6%|▌         | 176/3000 [01:18<20:33,  2.29it/s, v_num=1, train_loss_step=6.64e+6, train_loss_epoch=6.57e+6]Epoch 176/3000:   6%|▌         | 176/3000 [01:18<20:33,  2.29it/s, v_num=1, train_loss_step=6.64e+6, train_loss_epoch=6.54e+6]Epoch 177/3000:   6%|▌         | 176/3000 [01:18<20:33,  2.29it/s, v_num=1, train_loss_step=6.64e+6, train_loss_epoch=6.54e+6]Epoch 177/3000:   6%|▌         | 177/3000 [01:18<20:34,  2.29it/s, v_num=1, train_loss_step=6.64e+6, train_loss_epoch=6.54e+6]Epoch 177/3000:   6%|▌         | 177/3000 [01:18<20:34,  2.29it/s, v_num=1, train_loss_step=6.85e+6, train_loss_epoch=6.51e+6]Epoch 178/3000:   6%|▌         | 177/3000 [01:18<20:34,  2.29it/s, v_num=1, train_loss_step=6.85e+6, train_loss_epoch=6.51e+6]Epoch 178/3000:   6%|▌         | 178/3000 [01:19<20:35,  2.28it/s, v_num=1, train_loss_step=6.85e+6, train_loss_epoch=6.51e+6]Epoch 178/3000:   6%|▌         | 178/3000 [01:19<20:35,  2.28it/s, v_num=1, train_loss_step=6.92e+6, train_loss_epoch=6.49e+6]Epoch 179/3000:   6%|▌         | 178/3000 [01:19<20:35,  2.28it/s, v_num=1, train_loss_step=6.92e+6, train_loss_epoch=6.49e+6]Epoch 179/3000:   6%|▌         | 179/3000 [01:19<21:01,  2.24it/s, v_num=1, train_loss_step=6.92e+6, train_loss_epoch=6.49e+6]Epoch 179/3000:   6%|▌         | 179/3000 [01:19<21:01,  2.24it/s, v_num=1, train_loss_step=5.88e+6, train_loss_epoch=6.46e+6]Epoch 180/3000:   6%|▌         | 179/3000 [01:19<21:01,  2.24it/s, v_num=1, train_loss_step=5.88e+6, train_loss_epoch=6.46e+6]Epoch 180/3000:   6%|▌         | 180/3000 [01:19<19:06,  2.46it/s, v_num=1, train_loss_step=5.88e+6, train_loss_epoch=6.46e+6]Epoch 180/3000:   6%|▌         | 180/3000 [01:19<19:06,  2.46it/s, v_num=1, train_loss_step=6.16e+6, train_loss_epoch=6.44e+6]Epoch 181/3000:   6%|▌         | 180/3000 [01:19<19:06,  2.46it/s, v_num=1, train_loss_step=6.16e+6, train_loss_epoch=6.44e+6]Epoch 181/3000:   6%|▌         | 181/3000 [01:20<19:46,  2.38it/s, v_num=1, train_loss_step=6.16e+6, train_loss_epoch=6.44e+6]Epoch 181/3000:   6%|▌         | 181/3000 [01:20<19:46,  2.38it/s, v_num=1, train_loss_step=6.04e+6, train_loss_epoch=6.41e+6]Epoch 182/3000:   6%|▌         | 181/3000 [01:20<19:46,  2.38it/s, v_num=1, train_loss_step=6.04e+6, train_loss_epoch=6.41e+6]Epoch 182/3000:   6%|▌         | 182/3000 [01:20<20:33,  2.28it/s, v_num=1, train_loss_step=6.04e+6, train_loss_epoch=6.41e+6]Epoch 182/3000:   6%|▌         | 182/3000 [01:20<20:33,  2.28it/s, v_num=1, train_loss_step=6.87e+6, train_loss_epoch=6.38e+6]Epoch 183/3000:   6%|▌         | 182/3000 [01:20<20:33,  2.28it/s, v_num=1, train_loss_step=6.87e+6, train_loss_epoch=6.38e+6]Epoch 183/3000:   6%|▌         | 183/3000 [01:21<21:03,  2.23it/s, v_num=1, train_loss_step=6.87e+6, train_loss_epoch=6.38e+6]Epoch 183/3000:   6%|▌         | 183/3000 [01:21<21:03,  2.23it/s, v_num=1, train_loss_step=6.97e+6, train_loss_epoch=6.36e+6]Epoch 184/3000:   6%|▌         | 183/3000 [01:21<21:03,  2.23it/s, v_num=1, train_loss_step=6.97e+6, train_loss_epoch=6.36e+6]Epoch 184/3000:   6%|▌         | 184/3000 [01:21<20:39,  2.27it/s, v_num=1, train_loss_step=6.97e+6, train_loss_epoch=6.36e+6]Epoch 184/3000:   6%|▌         | 184/3000 [01:21<20:39,  2.27it/s, v_num=1, train_loss_step=6.29e+6, train_loss_epoch=6.33e+6]Epoch 185/3000:   6%|▌         | 184/3000 [01:21<20:39,  2.27it/s, v_num=1, train_loss_step=6.29e+6, train_loss_epoch=6.33e+6]Epoch 185/3000:   6%|▌         | 185/3000 [01:22<20:23,  2.30it/s, v_num=1, train_loss_step=6.29e+6, train_loss_epoch=6.33e+6]Epoch 185/3000:   6%|▌         | 185/3000 [01:22<20:23,  2.30it/s, v_num=1, train_loss_step=6.01e+6, train_loss_epoch=6.31e+6]Epoch 186/3000:   6%|▌         | 185/3000 [01:22<20:23,  2.30it/s, v_num=1, train_loss_step=6.01e+6, train_loss_epoch=6.31e+6]Epoch 186/3000:   6%|▌         | 186/3000 [01:22<19:42,  2.38it/s, v_num=1, train_loss_step=6.01e+6, train_loss_epoch=6.31e+6]Epoch 186/3000:   6%|▌         | 186/3000 [01:22<19:42,  2.38it/s, v_num=1, train_loss_step=5.99e+6, train_loss_epoch=6.28e+6]Epoch 187/3000:   6%|▌         | 186/3000 [01:22<19:42,  2.38it/s, v_num=1, train_loss_step=5.99e+6, train_loss_epoch=6.28e+6]Epoch 187/3000:   6%|▌         | 187/3000 [01:22<19:07,  2.45it/s, v_num=1, train_loss_step=5.99e+6, train_loss_epoch=6.28e+6]Epoch 187/3000:   6%|▌         | 187/3000 [01:22<19:07,  2.45it/s, v_num=1, train_loss_step=5.83e+6, train_loss_epoch=6.26e+6]Epoch 188/3000:   6%|▌         | 187/3000 [01:22<19:07,  2.45it/s, v_num=1, train_loss_step=5.83e+6, train_loss_epoch=6.26e+6]Epoch 188/3000:   6%|▋         | 188/3000 [01:23<20:48,  2.25it/s, v_num=1, train_loss_step=5.83e+6, train_loss_epoch=6.26e+6]Epoch 188/3000:   6%|▋         | 188/3000 [01:23<20:48,  2.25it/s, v_num=1, train_loss_step=6.06e+6, train_loss_epoch=6.23e+6]Epoch 189/3000:   6%|▋         | 188/3000 [01:23<20:48,  2.25it/s, v_num=1, train_loss_step=6.06e+6, train_loss_epoch=6.23e+6]Epoch 189/3000:   6%|▋         | 189/3000 [01:23<20:37,  2.27it/s, v_num=1, train_loss_step=6.06e+6, train_loss_epoch=6.23e+6]Epoch 189/3000:   6%|▋         | 189/3000 [01:23<20:37,  2.27it/s, v_num=1, train_loss_step=6.17e+6, train_loss_epoch=6.21e+6]Epoch 190/3000:   6%|▋         | 189/3000 [01:23<20:37,  2.27it/s, v_num=1, train_loss_step=6.17e+6, train_loss_epoch=6.21e+6]Epoch 190/3000:   6%|▋         | 190/3000 [01:24<20:55,  2.24it/s, v_num=1, train_loss_step=6.17e+6, train_loss_epoch=6.21e+6]Epoch 190/3000:   6%|▋         | 190/3000 [01:24<20:55,  2.24it/s, v_num=1, train_loss_step=5.38e+6, train_loss_epoch=6.19e+6]Epoch 191/3000:   6%|▋         | 190/3000 [01:24<20:55,  2.24it/s, v_num=1, train_loss_step=5.38e+6, train_loss_epoch=6.19e+6]Epoch 191/3000:   6%|▋         | 191/3000 [01:24<20:38,  2.27it/s, v_num=1, train_loss_step=5.38e+6, train_loss_epoch=6.19e+6]Epoch 191/3000:   6%|▋         | 191/3000 [01:24<20:38,  2.27it/s, v_num=1, train_loss_step=6.27e+6, train_loss_epoch=6.16e+6]Epoch 192/3000:   6%|▋         | 191/3000 [01:24<20:38,  2.27it/s, v_num=1, train_loss_step=6.27e+6, train_loss_epoch=6.16e+6]Epoch 192/3000:   6%|▋         | 192/3000 [01:25<20:23,  2.30it/s, v_num=1, train_loss_step=6.27e+6, train_loss_epoch=6.16e+6]Epoch 192/3000:   6%|▋         | 192/3000 [01:25<20:23,  2.30it/s, v_num=1, train_loss_step=5.39e+6, train_loss_epoch=6.14e+6]Epoch 193/3000:   6%|▋         | 192/3000 [01:25<20:23,  2.30it/s, v_num=1, train_loss_step=5.39e+6, train_loss_epoch=6.14e+6]Epoch 193/3000:   6%|▋         | 193/3000 [01:25<19:43,  2.37it/s, v_num=1, train_loss_step=5.39e+6, train_loss_epoch=6.14e+6]Epoch 193/3000:   6%|▋         | 193/3000 [01:25<19:43,  2.37it/s, v_num=1, train_loss_step=6.63e+6, train_loss_epoch=6.11e+6]Epoch 194/3000:   6%|▋         | 193/3000 [01:25<19:43,  2.37it/s, v_num=1, train_loss_step=6.63e+6, train_loss_epoch=6.11e+6]Epoch 194/3000:   6%|▋         | 194/3000 [01:26<20:06,  2.33it/s, v_num=1, train_loss_step=6.63e+6, train_loss_epoch=6.11e+6]Epoch 194/3000:   6%|▋         | 194/3000 [01:26<20:06,  2.33it/s, v_num=1, train_loss_step=6.27e+6, train_loss_epoch=6.09e+6]Epoch 195/3000:   6%|▋         | 194/3000 [01:26<20:06,  2.33it/s, v_num=1, train_loss_step=6.27e+6, train_loss_epoch=6.09e+6]Epoch 195/3000:   6%|▋         | 195/3000 [01:26<21:04,  2.22it/s, v_num=1, train_loss_step=6.27e+6, train_loss_epoch=6.09e+6]Epoch 195/3000:   6%|▋         | 195/3000 [01:26<21:04,  2.22it/s, v_num=1, train_loss_step=5.68e+6, train_loss_epoch=6.07e+6]Epoch 196/3000:   6%|▋         | 195/3000 [01:26<21:04,  2.22it/s, v_num=1, train_loss_step=5.68e+6, train_loss_epoch=6.07e+6]Epoch 196/3000:   7%|▋         | 196/3000 [01:27<21:46,  2.15it/s, v_num=1, train_loss_step=5.68e+6, train_loss_epoch=6.07e+6]Epoch 196/3000:   7%|▋         | 196/3000 [01:27<21:46,  2.15it/s, v_num=1, train_loss_step=6.45e+6, train_loss_epoch=6.04e+6]Epoch 197/3000:   7%|▋         | 196/3000 [01:27<21:46,  2.15it/s, v_num=1, train_loss_step=6.45e+6, train_loss_epoch=6.04e+6]Epoch 197/3000:   7%|▋         | 197/3000 [01:27<21:03,  2.22it/s, v_num=1, train_loss_step=6.45e+6, train_loss_epoch=6.04e+6]Epoch 197/3000:   7%|▋         | 197/3000 [01:27<21:03,  2.22it/s, v_num=1, train_loss_step=6.41e+6, train_loss_epoch=6.02e+6]Epoch 198/3000:   7%|▋         | 197/3000 [01:27<21:03,  2.22it/s, v_num=1, train_loss_step=6.41e+6, train_loss_epoch=6.02e+6]Epoch 198/3000:   7%|▋         | 198/3000 [01:27<20:41,  2.26it/s, v_num=1, train_loss_step=6.41e+6, train_loss_epoch=6.02e+6]Epoch 198/3000:   7%|▋         | 198/3000 [01:27<20:41,  2.26it/s, v_num=1, train_loss_step=6.48e+6, train_loss_epoch=6e+6]   Epoch 199/3000:   7%|▋         | 198/3000 [01:27<20:41,  2.26it/s, v_num=1, train_loss_step=6.48e+6, train_loss_epoch=6e+6]Epoch 199/3000:   7%|▋         | 199/3000 [01:28<20:43,  2.25it/s, v_num=1, train_loss_step=6.48e+6, train_loss_epoch=6e+6]Epoch 199/3000:   7%|▋         | 199/3000 [01:28<20:43,  2.25it/s, v_num=1, train_loss_step=5.95e+6, train_loss_epoch=5.98e+6]Epoch 200/3000:   7%|▋         | 199/3000 [01:28<20:43,  2.25it/s, v_num=1, train_loss_step=5.95e+6, train_loss_epoch=5.98e+6]Epoch 200/3000:   7%|▋         | 200/3000 [01:28<20:43,  2.25it/s, v_num=1, train_loss_step=5.95e+6, train_loss_epoch=5.98e+6]Epoch 200/3000:   7%|▋         | 200/3000 [01:28<20:43,  2.25it/s, v_num=1, train_loss_step=6.47e+6, train_loss_epoch=5.95e+6]Epoch 201/3000:   7%|▋         | 200/3000 [01:28<20:43,  2.25it/s, v_num=1, train_loss_step=6.47e+6, train_loss_epoch=5.95e+6]Epoch 201/3000:   7%|▋         | 201/3000 [01:29<20:30,  2.27it/s, v_num=1, train_loss_step=6.47e+6, train_loss_epoch=5.95e+6]Epoch 201/3000:   7%|▋         | 201/3000 [01:29<20:30,  2.27it/s, v_num=1, train_loss_step=5.66e+6, train_loss_epoch=5.93e+6]Epoch 202/3000:   7%|▋         | 201/3000 [01:29<20:30,  2.27it/s, v_num=1, train_loss_step=5.66e+6, train_loss_epoch=5.93e+6]Epoch 202/3000:   7%|▋         | 202/3000 [01:29<20:21,  2.29it/s, v_num=1, train_loss_step=5.66e+6, train_loss_epoch=5.93e+6]Epoch 202/3000:   7%|▋         | 202/3000 [01:29<20:21,  2.29it/s, v_num=1, train_loss_step=5.79e+6, train_loss_epoch=5.91e+6]Epoch 203/3000:   7%|▋         | 202/3000 [01:29<20:21,  2.29it/s, v_num=1, train_loss_step=5.79e+6, train_loss_epoch=5.91e+6]Epoch 203/3000:   7%|▋         | 203/3000 [01:30<20:28,  2.28it/s, v_num=1, train_loss_step=5.79e+6, train_loss_epoch=5.91e+6]Epoch 203/3000:   7%|▋         | 203/3000 [01:30<20:28,  2.28it/s, v_num=1, train_loss_step=6.16e+6, train_loss_epoch=5.89e+6]Epoch 204/3000:   7%|▋         | 203/3000 [01:30<20:28,  2.28it/s, v_num=1, train_loss_step=6.16e+6, train_loss_epoch=5.89e+6]Epoch 204/3000:   7%|▋         | 204/3000 [01:30<20:45,  2.24it/s, v_num=1, train_loss_step=6.16e+6, train_loss_epoch=5.89e+6]Epoch 204/3000:   7%|▋         | 204/3000 [01:30<20:45,  2.24it/s, v_num=1, train_loss_step=6.3e+6, train_loss_epoch=5.86e+6] Epoch 205/3000:   7%|▋         | 204/3000 [01:30<20:45,  2.24it/s, v_num=1, train_loss_step=6.3e+6, train_loss_epoch=5.86e+6]Epoch 205/3000:   7%|▋         | 205/3000 [01:31<20:39,  2.26it/s, v_num=1, train_loss_step=6.3e+6, train_loss_epoch=5.86e+6]Epoch 205/3000:   7%|▋         | 205/3000 [01:31<20:39,  2.26it/s, v_num=1, train_loss_step=6.15e+6, train_loss_epoch=5.84e+6]Epoch 206/3000:   7%|▋         | 205/3000 [01:31<20:39,  2.26it/s, v_num=1, train_loss_step=6.15e+6, train_loss_epoch=5.84e+6]Epoch 206/3000:   7%|▋         | 206/3000 [01:31<20:41,  2.25it/s, v_num=1, train_loss_step=6.15e+6, train_loss_epoch=5.84e+6]Epoch 206/3000:   7%|▋         | 206/3000 [01:31<20:41,  2.25it/s, v_num=1, train_loss_step=5.8e+6, train_loss_epoch=5.82e+6] Epoch 207/3000:   7%|▋         | 206/3000 [01:31<20:41,  2.25it/s, v_num=1, train_loss_step=5.8e+6, train_loss_epoch=5.82e+6]Epoch 207/3000:   7%|▋         | 207/3000 [01:31<20:44,  2.24it/s, v_num=1, train_loss_step=5.8e+6, train_loss_epoch=5.82e+6]Epoch 207/3000:   7%|▋         | 207/3000 [01:31<20:44,  2.24it/s, v_num=1, train_loss_step=6.35e+6, train_loss_epoch=5.8e+6]Epoch 208/3000:   7%|▋         | 207/3000 [01:31<20:44,  2.24it/s, v_num=1, train_loss_step=6.35e+6, train_loss_epoch=5.8e+6]Epoch 208/3000:   7%|▋         | 208/3000 [01:32<20:05,  2.32it/s, v_num=1, train_loss_step=6.35e+6, train_loss_epoch=5.8e+6]Epoch 208/3000:   7%|▋         | 208/3000 [01:32<20:05,  2.32it/s, v_num=1, train_loss_step=5.89e+6, train_loss_epoch=5.78e+6]Epoch 209/3000:   7%|▋         | 208/3000 [01:32<20:05,  2.32it/s, v_num=1, train_loss_step=5.89e+6, train_loss_epoch=5.78e+6]Epoch 209/3000:   7%|▋         | 209/3000 [01:32<19:31,  2.38it/s, v_num=1, train_loss_step=5.89e+6, train_loss_epoch=5.78e+6]Epoch 209/3000:   7%|▋         | 209/3000 [01:32<19:31,  2.38it/s, v_num=1, train_loss_step=5.56e+6, train_loss_epoch=5.76e+6]Epoch 210/3000:   7%|▋         | 209/3000 [01:32<19:31,  2.38it/s, v_num=1, train_loss_step=5.56e+6, train_loss_epoch=5.76e+6]Epoch 210/3000:   7%|▋         | 210/3000 [01:33<20:16,  2.29it/s, v_num=1, train_loss_step=5.56e+6, train_loss_epoch=5.76e+6]Epoch 210/3000:   7%|▋         | 210/3000 [01:33<20:16,  2.29it/s, v_num=1, train_loss_step=5.61e+6, train_loss_epoch=5.74e+6]Epoch 211/3000:   7%|▋         | 210/3000 [01:33<20:16,  2.29it/s, v_num=1, train_loss_step=5.61e+6, train_loss_epoch=5.74e+6]Epoch 211/3000:   7%|▋         | 211/3000 [01:33<20:46,  2.24it/s, v_num=1, train_loss_step=5.61e+6, train_loss_epoch=5.74e+6]Epoch 211/3000:   7%|▋         | 211/3000 [01:33<20:46,  2.24it/s, v_num=1, train_loss_step=5.37e+6, train_loss_epoch=5.71e+6]Epoch 212/3000:   7%|▋         | 211/3000 [01:33<20:46,  2.24it/s, v_num=1, train_loss_step=5.37e+6, train_loss_epoch=5.71e+6]Epoch 212/3000:   7%|▋         | 212/3000 [01:34<21:03,  2.21it/s, v_num=1, train_loss_step=5.37e+6, train_loss_epoch=5.71e+6]Epoch 212/3000:   7%|▋         | 212/3000 [01:34<21:03,  2.21it/s, v_num=1, train_loss_step=5.97e+6, train_loss_epoch=5.69e+6]Epoch 213/3000:   7%|▋         | 212/3000 [01:34<21:03,  2.21it/s, v_num=1, train_loss_step=5.97e+6, train_loss_epoch=5.69e+6]Epoch 213/3000:   7%|▋         | 213/3000 [01:34<20:32,  2.26it/s, v_num=1, train_loss_step=5.97e+6, train_loss_epoch=5.69e+6]Epoch 213/3000:   7%|▋         | 213/3000 [01:34<20:32,  2.26it/s, v_num=1, train_loss_step=5.13e+6, train_loss_epoch=5.67e+6]Epoch 214/3000:   7%|▋         | 213/3000 [01:34<20:32,  2.26it/s, v_num=1, train_loss_step=5.13e+6, train_loss_epoch=5.67e+6]Epoch 214/3000:   7%|▋         | 214/3000 [01:34<19:53,  2.33it/s, v_num=1, train_loss_step=5.13e+6, train_loss_epoch=5.67e+6]Epoch 214/3000:   7%|▋         | 214/3000 [01:34<19:53,  2.33it/s, v_num=1, train_loss_step=5.97e+6, train_loss_epoch=5.65e+6]Epoch 215/3000:   7%|▋         | 214/3000 [01:34<19:53,  2.33it/s, v_num=1, train_loss_step=5.97e+6, train_loss_epoch=5.65e+6]Epoch 215/3000:   7%|▋         | 215/3000 [01:35<20:24,  2.27it/s, v_num=1, train_loss_step=5.97e+6, train_loss_epoch=5.65e+6]Epoch 215/3000:   7%|▋         | 215/3000 [01:35<20:24,  2.27it/s, v_num=1, train_loss_step=5.84e+6, train_loss_epoch=5.63e+6]Epoch 216/3000:   7%|▋         | 215/3000 [01:35<20:24,  2.27it/s, v_num=1, train_loss_step=5.84e+6, train_loss_epoch=5.63e+6]Epoch 216/3000:   7%|▋         | 216/3000 [01:35<20:21,  2.28it/s, v_num=1, train_loss_step=5.84e+6, train_loss_epoch=5.63e+6]Epoch 216/3000:   7%|▋         | 216/3000 [01:35<20:21,  2.28it/s, v_num=1, train_loss_step=5.13e+6, train_loss_epoch=5.61e+6]Epoch 217/3000:   7%|▋         | 216/3000 [01:35<20:21,  2.28it/s, v_num=1, train_loss_step=5.13e+6, train_loss_epoch=5.61e+6]Epoch 217/3000:   7%|▋         | 217/3000 [01:36<20:34,  2.26it/s, v_num=1, train_loss_step=5.13e+6, train_loss_epoch=5.61e+6]Epoch 217/3000:   7%|▋         | 217/3000 [01:36<20:34,  2.26it/s, v_num=1, train_loss_step=5.56e+6, train_loss_epoch=5.59e+6]Epoch 218/3000:   7%|▋         | 217/3000 [01:36<20:34,  2.26it/s, v_num=1, train_loss_step=5.56e+6, train_loss_epoch=5.59e+6]Epoch 218/3000:   7%|▋         | 218/3000 [01:36<19:39,  2.36it/s, v_num=1, train_loss_step=5.56e+6, train_loss_epoch=5.59e+6]Epoch 218/3000:   7%|▋         | 218/3000 [01:36<19:39,  2.36it/s, v_num=1, train_loss_step=5.64e+6, train_loss_epoch=5.57e+6]Epoch 219/3000:   7%|▋         | 218/3000 [01:36<19:39,  2.36it/s, v_num=1, train_loss_step=5.64e+6, train_loss_epoch=5.57e+6]Epoch 219/3000:   7%|▋         | 219/3000 [01:37<20:18,  2.28it/s, v_num=1, train_loss_step=5.64e+6, train_loss_epoch=5.57e+6]Epoch 219/3000:   7%|▋         | 219/3000 [01:37<20:18,  2.28it/s, v_num=1, train_loss_step=5.5e+6, train_loss_epoch=5.55e+6] Epoch 220/3000:   7%|▋         | 219/3000 [01:37<20:18,  2.28it/s, v_num=1, train_loss_step=5.5e+6, train_loss_epoch=5.55e+6]Epoch 220/3000:   7%|▋         | 220/3000 [01:37<20:04,  2.31it/s, v_num=1, train_loss_step=5.5e+6, train_loss_epoch=5.55e+6]Epoch 220/3000:   7%|▋         | 220/3000 [01:37<20:04,  2.31it/s, v_num=1, train_loss_step=5.95e+6, train_loss_epoch=5.53e+6]Epoch 221/3000:   7%|▋         | 220/3000 [01:37<20:04,  2.31it/s, v_num=1, train_loss_step=5.95e+6, train_loss_epoch=5.53e+6]Epoch 221/3000:   7%|▋         | 221/3000 [01:37<19:20,  2.39it/s, v_num=1, train_loss_step=5.95e+6, train_loss_epoch=5.53e+6]Epoch 221/3000:   7%|▋         | 221/3000 [01:37<19:20,  2.39it/s, v_num=1, train_loss_step=5.69e+6, train_loss_epoch=5.51e+6]Epoch 222/3000:   7%|▋         | 221/3000 [01:37<19:20,  2.39it/s, v_num=1, train_loss_step=5.69e+6, train_loss_epoch=5.51e+6]Epoch 222/3000:   7%|▋         | 222/3000 [01:38<19:40,  2.35it/s, v_num=1, train_loss_step=5.69e+6, train_loss_epoch=5.51e+6]Epoch 222/3000:   7%|▋         | 222/3000 [01:38<19:40,  2.35it/s, v_num=1, train_loss_step=5.46e+6, train_loss_epoch=5.49e+6]Epoch 223/3000:   7%|▋         | 222/3000 [01:38<19:40,  2.35it/s, v_num=1, train_loss_step=5.46e+6, train_loss_epoch=5.49e+6]Epoch 223/3000:   7%|▋         | 223/3000 [01:38<18:24,  2.51it/s, v_num=1, train_loss_step=5.46e+6, train_loss_epoch=5.49e+6]Epoch 223/3000:   7%|▋         | 223/3000 [01:38<18:24,  2.51it/s, v_num=1, train_loss_step=5.34e+6, train_loss_epoch=5.48e+6]Epoch 224/3000:   7%|▋         | 223/3000 [01:38<18:24,  2.51it/s, v_num=1, train_loss_step=5.34e+6, train_loss_epoch=5.48e+6]Epoch 224/3000:   7%|▋         | 224/3000 [01:39<18:29,  2.50it/s, v_num=1, train_loss_step=5.34e+6, train_loss_epoch=5.48e+6]Epoch 224/3000:   7%|▋         | 224/3000 [01:39<18:29,  2.50it/s, v_num=1, train_loss_step=5.24e+6, train_loss_epoch=5.46e+6]Epoch 225/3000:   7%|▋         | 224/3000 [01:39<18:29,  2.50it/s, v_num=1, train_loss_step=5.24e+6, train_loss_epoch=5.46e+6]Epoch 225/3000:   8%|▊         | 225/3000 [01:39<17:57,  2.58it/s, v_num=1, train_loss_step=5.24e+6, train_loss_epoch=5.46e+6]Epoch 225/3000:   8%|▊         | 225/3000 [01:39<17:57,  2.58it/s, v_num=1, train_loss_step=5.54e+6, train_loss_epoch=5.44e+6]Epoch 226/3000:   8%|▊         | 225/3000 [01:39<17:57,  2.58it/s, v_num=1, train_loss_step=5.54e+6, train_loss_epoch=5.44e+6]Epoch 226/3000:   8%|▊         | 226/3000 [01:39<18:17,  2.53it/s, v_num=1, train_loss_step=5.54e+6, train_loss_epoch=5.44e+6]Epoch 226/3000:   8%|▊         | 226/3000 [01:39<18:17,  2.53it/s, v_num=1, train_loss_step=5.67e+6, train_loss_epoch=5.42e+6]Epoch 227/3000:   8%|▊         | 226/3000 [01:39<18:17,  2.53it/s, v_num=1, train_loss_step=5.67e+6, train_loss_epoch=5.42e+6]Epoch 227/3000:   8%|▊         | 227/3000 [01:40<18:46,  2.46it/s, v_num=1, train_loss_step=5.67e+6, train_loss_epoch=5.42e+6]Epoch 227/3000:   8%|▊         | 227/3000 [01:40<18:46,  2.46it/s, v_num=1, train_loss_step=5.43e+6, train_loss_epoch=5.4e+6] Epoch 228/3000:   8%|▊         | 227/3000 [01:40<18:46,  2.46it/s, v_num=1, train_loss_step=5.43e+6, train_loss_epoch=5.4e+6]Epoch 228/3000:   8%|▊         | 228/3000 [01:40<18:44,  2.47it/s, v_num=1, train_loss_step=5.43e+6, train_loss_epoch=5.4e+6]Epoch 228/3000:   8%|▊         | 228/3000 [01:40<18:44,  2.47it/s, v_num=1, train_loss_step=5.17e+6, train_loss_epoch=5.38e+6]Epoch 229/3000:   8%|▊         | 228/3000 [01:40<18:44,  2.47it/s, v_num=1, train_loss_step=5.17e+6, train_loss_epoch=5.38e+6]Epoch 229/3000:   8%|▊         | 229/3000 [01:41<19:29,  2.37it/s, v_num=1, train_loss_step=5.17e+6, train_loss_epoch=5.38e+6]Epoch 229/3000:   8%|▊         | 229/3000 [01:41<19:29,  2.37it/s, v_num=1, train_loss_step=5.49e+6, train_loss_epoch=5.36e+6]Epoch 230/3000:   8%|▊         | 229/3000 [01:41<19:29,  2.37it/s, v_num=1, train_loss_step=5.49e+6, train_loss_epoch=5.36e+6]Epoch 230/3000:   8%|▊         | 230/3000 [01:41<20:12,  2.28it/s, v_num=1, train_loss_step=5.49e+6, train_loss_epoch=5.36e+6]Epoch 230/3000:   8%|▊         | 230/3000 [01:41<20:12,  2.28it/s, v_num=1, train_loss_step=4.9e+6, train_loss_epoch=5.34e+6] Epoch 231/3000:   8%|▊         | 230/3000 [01:41<20:12,  2.28it/s, v_num=1, train_loss_step=4.9e+6, train_loss_epoch=5.34e+6]Epoch 231/3000:   8%|▊         | 231/3000 [01:42<20:47,  2.22it/s, v_num=1, train_loss_step=4.9e+6, train_loss_epoch=5.34e+6]Epoch 231/3000:   8%|▊         | 231/3000 [01:42<20:47,  2.22it/s, v_num=1, train_loss_step=5.49e+6, train_loss_epoch=5.33e+6]Epoch 232/3000:   8%|▊         | 231/3000 [01:42<20:47,  2.22it/s, v_num=1, train_loss_step=5.49e+6, train_loss_epoch=5.33e+6]Epoch 232/3000:   8%|▊         | 232/3000 [01:42<21:04,  2.19it/s, v_num=1, train_loss_step=5.49e+6, train_loss_epoch=5.33e+6]Epoch 232/3000:   8%|▊         | 232/3000 [01:42<21:04,  2.19it/s, v_num=1, train_loss_step=5.24e+6, train_loss_epoch=5.31e+6]Epoch 233/3000:   8%|▊         | 232/3000 [01:42<21:04,  2.19it/s, v_num=1, train_loss_step=5.24e+6, train_loss_epoch=5.31e+6]Epoch 233/3000:   8%|▊         | 233/3000 [01:43<20:29,  2.25it/s, v_num=1, train_loss_step=5.24e+6, train_loss_epoch=5.31e+6]Epoch 233/3000:   8%|▊         | 233/3000 [01:43<20:29,  2.25it/s, v_num=1, train_loss_step=5.52e+6, train_loss_epoch=5.29e+6]Epoch 234/3000:   8%|▊         | 233/3000 [01:43<20:29,  2.25it/s, v_num=1, train_loss_step=5.52e+6, train_loss_epoch=5.29e+6]Epoch 234/3000:   8%|▊         | 234/3000 [01:43<20:19,  2.27it/s, v_num=1, train_loss_step=5.52e+6, train_loss_epoch=5.29e+6]Epoch 234/3000:   8%|▊         | 234/3000 [01:43<20:19,  2.27it/s, v_num=1, train_loss_step=5.08e+6, train_loss_epoch=5.27e+6]Epoch 235/3000:   8%|▊         | 234/3000 [01:43<20:19,  2.27it/s, v_num=1, train_loss_step=5.08e+6, train_loss_epoch=5.27e+6]Epoch 235/3000:   8%|▊         | 235/3000 [01:43<19:19,  2.38it/s, v_num=1, train_loss_step=5.08e+6, train_loss_epoch=5.27e+6]Epoch 235/3000:   8%|▊         | 235/3000 [01:43<19:19,  2.38it/s, v_num=1, train_loss_step=5.76e+6, train_loss_epoch=5.25e+6]Epoch 236/3000:   8%|▊         | 235/3000 [01:43<19:19,  2.38it/s, v_num=1, train_loss_step=5.76e+6, train_loss_epoch=5.25e+6]Epoch 236/3000:   8%|▊         | 236/3000 [01:44<20:21,  2.26it/s, v_num=1, train_loss_step=5.76e+6, train_loss_epoch=5.25e+6]Epoch 236/3000:   8%|▊         | 236/3000 [01:44<20:21,  2.26it/s, v_num=1, train_loss_step=5.59e+6, train_loss_epoch=5.24e+6]Epoch 237/3000:   8%|▊         | 236/3000 [01:44<20:21,  2.26it/s, v_num=1, train_loss_step=5.59e+6, train_loss_epoch=5.24e+6]Epoch 237/3000:   8%|▊         | 237/3000 [01:44<21:33,  2.14it/s, v_num=1, train_loss_step=5.59e+6, train_loss_epoch=5.24e+6]Epoch 237/3000:   8%|▊         | 237/3000 [01:44<21:33,  2.14it/s, v_num=1, train_loss_step=5.26e+6, train_loss_epoch=5.22e+6]Epoch 238/3000:   8%|▊         | 237/3000 [01:44<21:33,  2.14it/s, v_num=1, train_loss_step=5.26e+6, train_loss_epoch=5.22e+6]Epoch 238/3000:   8%|▊         | 238/3000 [01:45<21:03,  2.19it/s, v_num=1, train_loss_step=5.26e+6, train_loss_epoch=5.22e+6]Epoch 238/3000:   8%|▊         | 238/3000 [01:45<21:03,  2.19it/s, v_num=1, train_loss_step=5.44e+6, train_loss_epoch=5.2e+6] Epoch 239/3000:   8%|▊         | 238/3000 [01:45<21:03,  2.19it/s, v_num=1, train_loss_step=5.44e+6, train_loss_epoch=5.2e+6]Epoch 239/3000:   8%|▊         | 239/3000 [01:45<20:34,  2.24it/s, v_num=1, train_loss_step=5.44e+6, train_loss_epoch=5.2e+6]Epoch 239/3000:   8%|▊         | 239/3000 [01:45<20:34,  2.24it/s, v_num=1, train_loss_step=5.06e+6, train_loss_epoch=5.18e+6]Epoch 240/3000:   8%|▊         | 239/3000 [01:45<20:34,  2.24it/s, v_num=1, train_loss_step=5.06e+6, train_loss_epoch=5.18e+6]Epoch 240/3000:   8%|▊         | 240/3000 [01:46<21:16,  2.16it/s, v_num=1, train_loss_step=5.06e+6, train_loss_epoch=5.18e+6]Epoch 240/3000:   8%|▊         | 240/3000 [01:46<21:16,  2.16it/s, v_num=1, train_loss_step=5.4e+6, train_loss_epoch=5.17e+6] Epoch 241/3000:   8%|▊         | 240/3000 [01:46<21:16,  2.16it/s, v_num=1, train_loss_step=5.4e+6, train_loss_epoch=5.17e+6]Epoch 241/3000:   8%|▊         | 241/3000 [01:46<21:02,  2.19it/s, v_num=1, train_loss_step=5.4e+6, train_loss_epoch=5.17e+6]Epoch 241/3000:   8%|▊         | 241/3000 [01:46<21:02,  2.19it/s, v_num=1, train_loss_step=5.58e+6, train_loss_epoch=5.15e+6]Epoch 242/3000:   8%|▊         | 241/3000 [01:46<21:02,  2.19it/s, v_num=1, train_loss_step=5.58e+6, train_loss_epoch=5.15e+6]Epoch 242/3000:   8%|▊         | 242/3000 [01:47<20:33,  2.24it/s, v_num=1, train_loss_step=5.58e+6, train_loss_epoch=5.15e+6]Epoch 242/3000:   8%|▊         | 242/3000 [01:47<20:33,  2.24it/s, v_num=1, train_loss_step=5.35e+6, train_loss_epoch=5.13e+6]Epoch 243/3000:   8%|▊         | 242/3000 [01:47<20:33,  2.24it/s, v_num=1, train_loss_step=5.35e+6, train_loss_epoch=5.13e+6]Epoch 243/3000:   8%|▊         | 243/3000 [01:47<19:16,  2.38it/s, v_num=1, train_loss_step=5.35e+6, train_loss_epoch=5.13e+6]Epoch 243/3000:   8%|▊         | 243/3000 [01:47<19:16,  2.38it/s, v_num=1, train_loss_step=5.53e+6, train_loss_epoch=5.11e+6]Epoch 244/3000:   8%|▊         | 243/3000 [01:47<19:16,  2.38it/s, v_num=1, train_loss_step=5.53e+6, train_loss_epoch=5.11e+6]Epoch 244/3000:   8%|▊         | 244/3000 [01:47<20:07,  2.28it/s, v_num=1, train_loss_step=5.53e+6, train_loss_epoch=5.11e+6]Epoch 244/3000:   8%|▊         | 244/3000 [01:47<20:07,  2.28it/s, v_num=1, train_loss_step=4.79e+6, train_loss_epoch=5.1e+6] Epoch 245/3000:   8%|▊         | 244/3000 [01:47<20:07,  2.28it/s, v_num=1, train_loss_step=4.79e+6, train_loss_epoch=5.1e+6]Epoch 245/3000:   8%|▊         | 245/3000 [01:48<20:22,  2.25it/s, v_num=1, train_loss_step=4.79e+6, train_loss_epoch=5.1e+6]Epoch 245/3000:   8%|▊         | 245/3000 [01:48<20:22,  2.25it/s, v_num=1, train_loss_step=5.11e+6, train_loss_epoch=5.08e+6]Epoch 246/3000:   8%|▊         | 245/3000 [01:48<20:22,  2.25it/s, v_num=1, train_loss_step=5.11e+6, train_loss_epoch=5.08e+6]Epoch 246/3000:   8%|▊         | 246/3000 [01:48<20:18,  2.26it/s, v_num=1, train_loss_step=5.11e+6, train_loss_epoch=5.08e+6]Epoch 246/3000:   8%|▊         | 246/3000 [01:48<20:18,  2.26it/s, v_num=1, train_loss_step=5.02e+6, train_loss_epoch=5.06e+6]Epoch 247/3000:   8%|▊         | 246/3000 [01:48<20:18,  2.26it/s, v_num=1, train_loss_step=5.02e+6, train_loss_epoch=5.06e+6]Epoch 247/3000:   8%|▊         | 247/3000 [01:49<19:31,  2.35it/s, v_num=1, train_loss_step=5.02e+6, train_loss_epoch=5.06e+6]Epoch 247/3000:   8%|▊         | 247/3000 [01:49<19:31,  2.35it/s, v_num=1, train_loss_step=5.1e+6, train_loss_epoch=5.05e+6] Epoch 248/3000:   8%|▊         | 247/3000 [01:49<19:31,  2.35it/s, v_num=1, train_loss_step=5.1e+6, train_loss_epoch=5.05e+6]Epoch 248/3000:   8%|▊         | 248/3000 [01:49<19:33,  2.35it/s, v_num=1, train_loss_step=5.1e+6, train_loss_epoch=5.05e+6]Epoch 248/3000:   8%|▊         | 248/3000 [01:49<19:33,  2.35it/s, v_num=1, train_loss_step=4.82e+6, train_loss_epoch=5.03e+6]Epoch 249/3000:   8%|▊         | 248/3000 [01:49<19:33,  2.35it/s, v_num=1, train_loss_step=4.82e+6, train_loss_epoch=5.03e+6]Epoch 249/3000:   8%|▊         | 249/3000 [01:50<19:32,  2.35it/s, v_num=1, train_loss_step=4.82e+6, train_loss_epoch=5.03e+6]Epoch 249/3000:   8%|▊         | 249/3000 [01:50<19:32,  2.35it/s, v_num=1, train_loss_step=4.84e+6, train_loss_epoch=5.02e+6]Epoch 250/3000:   8%|▊         | 249/3000 [01:50<19:32,  2.35it/s, v_num=1, train_loss_step=4.84e+6, train_loss_epoch=5.02e+6]Epoch 250/3000:   8%|▊         | 250/3000 [01:50<19:35,  2.34it/s, v_num=1, train_loss_step=4.84e+6, train_loss_epoch=5.02e+6]Epoch 250/3000:   8%|▊         | 250/3000 [01:50<19:35,  2.34it/s, v_num=1, train_loss_step=5.18e+6, train_loss_epoch=5e+6]   Epoch 251/3000:   8%|▊         | 250/3000 [01:50<19:35,  2.34it/s, v_num=1, train_loss_step=5.18e+6, train_loss_epoch=5e+6]Epoch 251/3000:   8%|▊         | 251/3000 [01:50<19:49,  2.31it/s, v_num=1, train_loss_step=5.18e+6, train_loss_epoch=5e+6]Epoch 251/3000:   8%|▊         | 251/3000 [01:50<19:49,  2.31it/s, v_num=1, train_loss_step=4.82e+6, train_loss_epoch=4.98e+6]Epoch 252/3000:   8%|▊         | 251/3000 [01:50<19:49,  2.31it/s, v_num=1, train_loss_step=4.82e+6, train_loss_epoch=4.98e+6]Epoch 252/3000:   8%|▊         | 252/3000 [01:51<19:59,  2.29it/s, v_num=1, train_loss_step=4.82e+6, train_loss_epoch=4.98e+6]Epoch 252/3000:   8%|▊         | 252/3000 [01:51<19:59,  2.29it/s, v_num=1, train_loss_step=5.12e+6, train_loss_epoch=4.97e+6]Epoch 253/3000:   8%|▊         | 252/3000 [01:51<19:59,  2.29it/s, v_num=1, train_loss_step=5.12e+6, train_loss_epoch=4.97e+6]Epoch 253/3000:   8%|▊         | 253/3000 [01:51<19:53,  2.30it/s, v_num=1, train_loss_step=5.12e+6, train_loss_epoch=4.97e+6]Epoch 253/3000:   8%|▊         | 253/3000 [01:51<19:53,  2.30it/s, v_num=1, train_loss_step=4.88e+6, train_loss_epoch=4.95e+6]Epoch 254/3000:   8%|▊         | 253/3000 [01:51<19:53,  2.30it/s, v_num=1, train_loss_step=4.88e+6, train_loss_epoch=4.95e+6]Epoch 254/3000:   8%|▊         | 254/3000 [01:52<19:40,  2.33it/s, v_num=1, train_loss_step=4.88e+6, train_loss_epoch=4.95e+6]Epoch 254/3000:   8%|▊         | 254/3000 [01:52<19:40,  2.33it/s, v_num=1, train_loss_step=4.86e+6, train_loss_epoch=4.94e+6]Epoch 255/3000:   8%|▊         | 254/3000 [01:52<19:40,  2.33it/s, v_num=1, train_loss_step=4.86e+6, train_loss_epoch=4.94e+6]Epoch 255/3000:   8%|▊         | 255/3000 [01:52<19:30,  2.35it/s, v_num=1, train_loss_step=4.86e+6, train_loss_epoch=4.94e+6]Epoch 255/3000:   8%|▊         | 255/3000 [01:52<19:30,  2.35it/s, v_num=1, train_loss_step=5.18e+6, train_loss_epoch=4.92e+6]Epoch 256/3000:   8%|▊         | 255/3000 [01:52<19:30,  2.35it/s, v_num=1, train_loss_step=5.18e+6, train_loss_epoch=4.92e+6]Epoch 256/3000:   9%|▊         | 256/3000 [01:53<19:39,  2.33it/s, v_num=1, train_loss_step=5.18e+6, train_loss_epoch=4.92e+6]Epoch 256/3000:   9%|▊         | 256/3000 [01:53<19:39,  2.33it/s, v_num=1, train_loss_step=4.72e+6, train_loss_epoch=4.9e+6] Epoch 257/3000:   9%|▊         | 256/3000 [01:53<19:39,  2.33it/s, v_num=1, train_loss_step=4.72e+6, train_loss_epoch=4.9e+6]Epoch 257/3000:   9%|▊         | 257/3000 [01:53<19:59,  2.29it/s, v_num=1, train_loss_step=4.72e+6, train_loss_epoch=4.9e+6]Epoch 257/3000:   9%|▊         | 257/3000 [01:53<19:59,  2.29it/s, v_num=1, train_loss_step=4.89e+6, train_loss_epoch=4.89e+6]Epoch 258/3000:   9%|▊         | 257/3000 [01:53<19:59,  2.29it/s, v_num=1, train_loss_step=4.89e+6, train_loss_epoch=4.89e+6]Epoch 258/3000:   9%|▊         | 258/3000 [01:53<19:18,  2.37it/s, v_num=1, train_loss_step=4.89e+6, train_loss_epoch=4.89e+6]Epoch 258/3000:   9%|▊         | 258/3000 [01:53<19:18,  2.37it/s, v_num=1, train_loss_step=4.72e+6, train_loss_epoch=4.87e+6]Epoch 259/3000:   9%|▊         | 258/3000 [01:53<19:18,  2.37it/s, v_num=1, train_loss_step=4.72e+6, train_loss_epoch=4.87e+6]Epoch 259/3000:   9%|▊         | 259/3000 [01:54<19:45,  2.31it/s, v_num=1, train_loss_step=4.72e+6, train_loss_epoch=4.87e+6]Epoch 259/3000:   9%|▊         | 259/3000 [01:54<19:45,  2.31it/s, v_num=1, train_loss_step=4.71e+6, train_loss_epoch=4.86e+6]Epoch 260/3000:   9%|▊         | 259/3000 [01:54<19:45,  2.31it/s, v_num=1, train_loss_step=4.71e+6, train_loss_epoch=4.86e+6]Epoch 260/3000:   9%|▊         | 260/3000 [01:54<18:57,  2.41it/s, v_num=1, train_loss_step=4.71e+6, train_loss_epoch=4.86e+6]Epoch 260/3000:   9%|▊         | 260/3000 [01:54<18:57,  2.41it/s, v_num=1, train_loss_step=4.98e+6, train_loss_epoch=4.84e+6]Epoch 261/3000:   9%|▊         | 260/3000 [01:54<18:57,  2.41it/s, v_num=1, train_loss_step=4.98e+6, train_loss_epoch=4.84e+6]Epoch 261/3000:   9%|▊         | 261/3000 [01:55<18:33,  2.46it/s, v_num=1, train_loss_step=4.98e+6, train_loss_epoch=4.84e+6]Epoch 261/3000:   9%|▊         | 261/3000 [01:55<18:33,  2.46it/s, v_num=1, train_loss_step=4.88e+6, train_loss_epoch=4.83e+6]Epoch 262/3000:   9%|▊         | 261/3000 [01:55<18:33,  2.46it/s, v_num=1, train_loss_step=4.88e+6, train_loss_epoch=4.83e+6]Epoch 262/3000:   9%|▊         | 262/3000 [01:55<19:40,  2.32it/s, v_num=1, train_loss_step=4.88e+6, train_loss_epoch=4.83e+6]Epoch 262/3000:   9%|▊         | 262/3000 [01:55<19:40,  2.32it/s, v_num=1, train_loss_step=4.41e+6, train_loss_epoch=4.81e+6]Epoch 263/3000:   9%|▊         | 262/3000 [01:55<19:40,  2.32it/s, v_num=1, train_loss_step=4.41e+6, train_loss_epoch=4.81e+6]Epoch 263/3000:   9%|▉         | 263/3000 [01:56<19:59,  2.28it/s, v_num=1, train_loss_step=4.41e+6, train_loss_epoch=4.81e+6]Epoch 263/3000:   9%|▉         | 263/3000 [01:56<19:59,  2.28it/s, v_num=1, train_loss_step=5.17e+6, train_loss_epoch=4.8e+6] Epoch 264/3000:   9%|▉         | 263/3000 [01:56<19:59,  2.28it/s, v_num=1, train_loss_step=5.17e+6, train_loss_epoch=4.8e+6]Epoch 264/3000:   9%|▉         | 264/3000 [01:56<20:21,  2.24it/s, v_num=1, train_loss_step=5.17e+6, train_loss_epoch=4.8e+6]Epoch 264/3000:   9%|▉         | 264/3000 [01:56<20:21,  2.24it/s, v_num=1, train_loss_step=4.42e+6, train_loss_epoch=4.78e+6]Epoch 265/3000:   9%|▉         | 264/3000 [01:56<20:21,  2.24it/s, v_num=1, train_loss_step=4.42e+6, train_loss_epoch=4.78e+6]Epoch 265/3000:   9%|▉         | 265/3000 [01:56<20:07,  2.26it/s, v_num=1, train_loss_step=4.42e+6, train_loss_epoch=4.78e+6]Epoch 265/3000:   9%|▉         | 265/3000 [01:56<20:07,  2.26it/s, v_num=1, train_loss_step=5.1e+6, train_loss_epoch=4.77e+6] Epoch 266/3000:   9%|▉         | 265/3000 [01:56<20:07,  2.26it/s, v_num=1, train_loss_step=5.1e+6, train_loss_epoch=4.77e+6]Epoch 266/3000:   9%|▉         | 266/3000 [01:57<19:22,  2.35it/s, v_num=1, train_loss_step=5.1e+6, train_loss_epoch=4.77e+6]Epoch 266/3000:   9%|▉         | 266/3000 [01:57<19:22,  2.35it/s, v_num=1, train_loss_step=4.9e+6, train_loss_epoch=4.75e+6]Epoch 267/3000:   9%|▉         | 266/3000 [01:57<19:22,  2.35it/s, v_num=1, train_loss_step=4.9e+6, train_loss_epoch=4.75e+6]Epoch 267/3000:   9%|▉         | 267/3000 [01:57<19:35,  2.33it/s, v_num=1, train_loss_step=4.9e+6, train_loss_epoch=4.75e+6]Epoch 267/3000:   9%|▉         | 267/3000 [01:57<19:35,  2.33it/s, v_num=1, train_loss_step=4.94e+6, train_loss_epoch=4.74e+6]Epoch 268/3000:   9%|▉         | 267/3000 [01:57<19:35,  2.33it/s, v_num=1, train_loss_step=4.94e+6, train_loss_epoch=4.74e+6]Epoch 268/3000:   9%|▉         | 268/3000 [01:58<20:21,  2.24it/s, v_num=1, train_loss_step=4.94e+6, train_loss_epoch=4.74e+6]Epoch 268/3000:   9%|▉         | 268/3000 [01:58<20:21,  2.24it/s, v_num=1, train_loss_step=4.41e+6, train_loss_epoch=4.72e+6]Epoch 269/3000:   9%|▉         | 268/3000 [01:58<20:21,  2.24it/s, v_num=1, train_loss_step=4.41e+6, train_loss_epoch=4.72e+6]Epoch 269/3000:   9%|▉         | 269/3000 [01:58<20:14,  2.25it/s, v_num=1, train_loss_step=4.41e+6, train_loss_epoch=4.72e+6]Epoch 269/3000:   9%|▉         | 269/3000 [01:58<20:14,  2.25it/s, v_num=1, train_loss_step=4.31e+6, train_loss_epoch=4.71e+6]Epoch 270/3000:   9%|▉         | 269/3000 [01:58<20:14,  2.25it/s, v_num=1, train_loss_step=4.31e+6, train_loss_epoch=4.71e+6]Epoch 270/3000:   9%|▉         | 270/3000 [01:59<18:47,  2.42it/s, v_num=1, train_loss_step=4.31e+6, train_loss_epoch=4.71e+6]Epoch 270/3000:   9%|▉         | 270/3000 [01:59<18:47,  2.42it/s, v_num=1, train_loss_step=4.82e+6, train_loss_epoch=4.7e+6] Epoch 271/3000:   9%|▉         | 270/3000 [01:59<18:47,  2.42it/s, v_num=1, train_loss_step=4.82e+6, train_loss_epoch=4.7e+6]Epoch 271/3000:   9%|▉         | 271/3000 [01:59<18:54,  2.41it/s, v_num=1, train_loss_step=4.82e+6, train_loss_epoch=4.7e+6]Epoch 271/3000:   9%|▉         | 271/3000 [01:59<18:54,  2.41it/s, v_num=1, train_loss_step=4.35e+6, train_loss_epoch=4.68e+6]Epoch 272/3000:   9%|▉         | 271/3000 [01:59<18:54,  2.41it/s, v_num=1, train_loss_step=4.35e+6, train_loss_epoch=4.68e+6]Epoch 272/3000:   9%|▉         | 272/3000 [01:59<18:43,  2.43it/s, v_num=1, train_loss_step=4.35e+6, train_loss_epoch=4.68e+6]Epoch 272/3000:   9%|▉         | 272/3000 [01:59<18:43,  2.43it/s, v_num=1, train_loss_step=4.37e+6, train_loss_epoch=4.67e+6]Epoch 273/3000:   9%|▉         | 272/3000 [01:59<18:43,  2.43it/s, v_num=1, train_loss_step=4.37e+6, train_loss_epoch=4.67e+6]Epoch 273/3000:   9%|▉         | 273/3000 [02:00<18:45,  2.42it/s, v_num=1, train_loss_step=4.37e+6, train_loss_epoch=4.67e+6]Epoch 273/3000:   9%|▉         | 273/3000 [02:00<18:45,  2.42it/s, v_num=1, train_loss_step=4.56e+6, train_loss_epoch=4.65e+6]Epoch 274/3000:   9%|▉         | 273/3000 [02:00<18:45,  2.42it/s, v_num=1, train_loss_step=4.56e+6, train_loss_epoch=4.65e+6]Epoch 274/3000:   9%|▉         | 274/3000 [02:00<19:26,  2.34it/s, v_num=1, train_loss_step=4.56e+6, train_loss_epoch=4.65e+6]Epoch 274/3000:   9%|▉         | 274/3000 [02:00<19:26,  2.34it/s, v_num=1, train_loss_step=4.76e+6, train_loss_epoch=4.64e+6]Epoch 275/3000:   9%|▉         | 274/3000 [02:00<19:26,  2.34it/s, v_num=1, train_loss_step=4.76e+6, train_loss_epoch=4.64e+6]Epoch 275/3000:   9%|▉         | 275/3000 [02:01<18:31,  2.45it/s, v_num=1, train_loss_step=4.76e+6, train_loss_epoch=4.64e+6]Epoch 275/3000:   9%|▉         | 275/3000 [02:01<18:31,  2.45it/s, v_num=1, train_loss_step=4.23e+6, train_loss_epoch=4.63e+6]Epoch 276/3000:   9%|▉         | 275/3000 [02:01<18:31,  2.45it/s, v_num=1, train_loss_step=4.23e+6, train_loss_epoch=4.63e+6]Epoch 276/3000:   9%|▉         | 276/3000 [02:01<19:10,  2.37it/s, v_num=1, train_loss_step=4.23e+6, train_loss_epoch=4.63e+6]Epoch 276/3000:   9%|▉         | 276/3000 [02:01<19:10,  2.37it/s, v_num=1, train_loss_step=4.5e+6, train_loss_epoch=4.61e+6] Epoch 277/3000:   9%|▉         | 276/3000 [02:01<19:10,  2.37it/s, v_num=1, train_loss_step=4.5e+6, train_loss_epoch=4.61e+6]Epoch 277/3000:   9%|▉         | 277/3000 [02:02<19:22,  2.34it/s, v_num=1, train_loss_step=4.5e+6, train_loss_epoch=4.61e+6]Epoch 277/3000:   9%|▉         | 277/3000 [02:02<19:22,  2.34it/s, v_num=1, train_loss_step=4.55e+6, train_loss_epoch=4.6e+6]Epoch 278/3000:   9%|▉         | 277/3000 [02:02<19:22,  2.34it/s, v_num=1, train_loss_step=4.55e+6, train_loss_epoch=4.6e+6]Epoch 278/3000:   9%|▉         | 278/3000 [02:02<18:49,  2.41it/s, v_num=1, train_loss_step=4.55e+6, train_loss_epoch=4.6e+6]Epoch 278/3000:   9%|▉         | 278/3000 [02:02<18:49,  2.41it/s, v_num=1, train_loss_step=4.79e+6, train_loss_epoch=4.58e+6]Epoch 279/3000:   9%|▉         | 278/3000 [02:02<18:49,  2.41it/s, v_num=1, train_loss_step=4.79e+6, train_loss_epoch=4.58e+6]Epoch 279/3000:   9%|▉         | 279/3000 [02:02<18:33,  2.44it/s, v_num=1, train_loss_step=4.79e+6, train_loss_epoch=4.58e+6]Epoch 279/3000:   9%|▉         | 279/3000 [02:02<18:33,  2.44it/s, v_num=1, train_loss_step=5.01e+6, train_loss_epoch=4.57e+6]Epoch 280/3000:   9%|▉         | 279/3000 [02:02<18:33,  2.44it/s, v_num=1, train_loss_step=5.01e+6, train_loss_epoch=4.57e+6]Epoch 280/3000:   9%|▉         | 280/3000 [02:03<18:45,  2.42it/s, v_num=1, train_loss_step=5.01e+6, train_loss_epoch=4.57e+6]Epoch 280/3000:   9%|▉         | 280/3000 [02:03<18:45,  2.42it/s, v_num=1, train_loss_step=4.35e+6, train_loss_epoch=4.56e+6]Epoch 281/3000:   9%|▉         | 280/3000 [02:03<18:45,  2.42it/s, v_num=1, train_loss_step=4.35e+6, train_loss_epoch=4.56e+6]Epoch 281/3000:   9%|▉         | 281/3000 [02:03<19:05,  2.37it/s, v_num=1, train_loss_step=4.35e+6, train_loss_epoch=4.56e+6]Epoch 281/3000:   9%|▉         | 281/3000 [02:03<19:05,  2.37it/s, v_num=1, train_loss_step=4.9e+6, train_loss_epoch=4.54e+6] Epoch 282/3000:   9%|▉         | 281/3000 [02:03<19:05,  2.37it/s, v_num=1, train_loss_step=4.9e+6, train_loss_epoch=4.54e+6]Epoch 282/3000:   9%|▉         | 282/3000 [02:04<18:41,  2.42it/s, v_num=1, train_loss_step=4.9e+6, train_loss_epoch=4.54e+6]Epoch 282/3000:   9%|▉         | 282/3000 [02:04<18:41,  2.42it/s, v_num=1, train_loss_step=4.45e+6, train_loss_epoch=4.53e+6]Epoch 283/3000:   9%|▉         | 282/3000 [02:04<18:41,  2.42it/s, v_num=1, train_loss_step=4.45e+6, train_loss_epoch=4.53e+6]Epoch 283/3000:   9%|▉         | 283/3000 [02:04<19:26,  2.33it/s, v_num=1, train_loss_step=4.45e+6, train_loss_epoch=4.53e+6]Epoch 283/3000:   9%|▉         | 283/3000 [02:04<19:26,  2.33it/s, v_num=1, train_loss_step=4.45e+6, train_loss_epoch=4.52e+6]Epoch 284/3000:   9%|▉         | 283/3000 [02:04<19:26,  2.33it/s, v_num=1, train_loss_step=4.45e+6, train_loss_epoch=4.52e+6]Epoch 284/3000:   9%|▉         | 284/3000 [02:04<18:18,  2.47it/s, v_num=1, train_loss_step=4.45e+6, train_loss_epoch=4.52e+6]Epoch 284/3000:   9%|▉         | 284/3000 [02:04<18:18,  2.47it/s, v_num=1, train_loss_step=4.53e+6, train_loss_epoch=4.5e+6] Epoch 285/3000:   9%|▉         | 284/3000 [02:04<18:18,  2.47it/s, v_num=1, train_loss_step=4.53e+6, train_loss_epoch=4.5e+6]Epoch 285/3000:  10%|▉         | 285/3000 [02:05<17:32,  2.58it/s, v_num=1, train_loss_step=4.53e+6, train_loss_epoch=4.5e+6]Epoch 285/3000:  10%|▉         | 285/3000 [02:05<17:32,  2.58it/s, v_num=1, train_loss_step=4.34e+6, train_loss_epoch=4.49e+6]Epoch 286/3000:  10%|▉         | 285/3000 [02:05<17:32,  2.58it/s, v_num=1, train_loss_step=4.34e+6, train_loss_epoch=4.49e+6]Epoch 286/3000:  10%|▉         | 286/3000 [02:05<17:47,  2.54it/s, v_num=1, train_loss_step=4.34e+6, train_loss_epoch=4.49e+6]Epoch 286/3000:  10%|▉         | 286/3000 [02:05<17:47,  2.54it/s, v_num=1, train_loss_step=4.61e+6, train_loss_epoch=4.48e+6]Epoch 287/3000:  10%|▉         | 286/3000 [02:05<17:47,  2.54it/s, v_num=1, train_loss_step=4.61e+6, train_loss_epoch=4.48e+6]Epoch 287/3000:  10%|▉         | 287/3000 [02:06<19:20,  2.34it/s, v_num=1, train_loss_step=4.61e+6, train_loss_epoch=4.48e+6]Epoch 287/3000:  10%|▉         | 287/3000 [02:06<19:20,  2.34it/s, v_num=1, train_loss_step=4.38e+6, train_loss_epoch=4.47e+6]Epoch 288/3000:  10%|▉         | 287/3000 [02:06<19:20,  2.34it/s, v_num=1, train_loss_step=4.38e+6, train_loss_epoch=4.47e+6]Epoch 288/3000:  10%|▉         | 288/3000 [02:06<20:17,  2.23it/s, v_num=1, train_loss_step=4.38e+6, train_loss_epoch=4.47e+6]Epoch 288/3000:  10%|▉         | 288/3000 [02:06<20:17,  2.23it/s, v_num=1, train_loss_step=4.35e+6, train_loss_epoch=4.45e+6]Epoch 289/3000:  10%|▉         | 288/3000 [02:06<20:17,  2.23it/s, v_num=1, train_loss_step=4.35e+6, train_loss_epoch=4.45e+6]Epoch 289/3000:  10%|▉         | 289/3000 [02:07<19:48,  2.28it/s, v_num=1, train_loss_step=4.35e+6, train_loss_epoch=4.45e+6]Epoch 289/3000:  10%|▉         | 289/3000 [02:07<19:48,  2.28it/s, v_num=1, train_loss_step=3.95e+6, train_loss_epoch=4.44e+6]Epoch 290/3000:  10%|▉         | 289/3000 [02:07<19:48,  2.28it/s, v_num=1, train_loss_step=3.95e+6, train_loss_epoch=4.44e+6]Epoch 290/3000:  10%|▉         | 290/3000 [02:07<19:50,  2.28it/s, v_num=1, train_loss_step=3.95e+6, train_loss_epoch=4.44e+6]Epoch 290/3000:  10%|▉         | 290/3000 [02:07<19:50,  2.28it/s, v_num=1, train_loss_step=4.27e+6, train_loss_epoch=4.43e+6]Epoch 291/3000:  10%|▉         | 290/3000 [02:07<19:50,  2.28it/s, v_num=1, train_loss_step=4.27e+6, train_loss_epoch=4.43e+6]Epoch 291/3000:  10%|▉         | 291/3000 [02:07<19:05,  2.37it/s, v_num=1, train_loss_step=4.27e+6, train_loss_epoch=4.43e+6]Epoch 291/3000:  10%|▉         | 291/3000 [02:07<19:05,  2.37it/s, v_num=1, train_loss_step=3.97e+6, train_loss_epoch=4.41e+6]Epoch 292/3000:  10%|▉         | 291/3000 [02:07<19:05,  2.37it/s, v_num=1, train_loss_step=3.97e+6, train_loss_epoch=4.41e+6]Epoch 292/3000:  10%|▉         | 292/3000 [02:08<20:26,  2.21it/s, v_num=1, train_loss_step=3.97e+6, train_loss_epoch=4.41e+6]Epoch 292/3000:  10%|▉         | 292/3000 [02:08<20:26,  2.21it/s, v_num=1, train_loss_step=4.44e+6, train_loss_epoch=4.4e+6] Epoch 293/3000:  10%|▉         | 292/3000 [02:08<20:26,  2.21it/s, v_num=1, train_loss_step=4.44e+6, train_loss_epoch=4.4e+6]Epoch 293/3000:  10%|▉         | 293/3000 [02:08<20:35,  2.19it/s, v_num=1, train_loss_step=4.44e+6, train_loss_epoch=4.4e+6]Epoch 293/3000:  10%|▉         | 293/3000 [02:08<20:35,  2.19it/s, v_num=1, train_loss_step=4.46e+6, train_loss_epoch=4.39e+6]Epoch 294/3000:  10%|▉         | 293/3000 [02:08<20:35,  2.19it/s, v_num=1, train_loss_step=4.46e+6, train_loss_epoch=4.39e+6]Epoch 294/3000:  10%|▉         | 294/3000 [02:09<20:05,  2.24it/s, v_num=1, train_loss_step=4.46e+6, train_loss_epoch=4.39e+6]Epoch 294/3000:  10%|▉         | 294/3000 [02:09<20:05,  2.24it/s, v_num=1, train_loss_step=4.71e+6, train_loss_epoch=4.38e+6]Epoch 295/3000:  10%|▉         | 294/3000 [02:09<20:05,  2.24it/s, v_num=1, train_loss_step=4.71e+6, train_loss_epoch=4.38e+6]Epoch 295/3000:  10%|▉         | 295/3000 [02:09<19:21,  2.33it/s, v_num=1, train_loss_step=4.71e+6, train_loss_epoch=4.38e+6]Epoch 295/3000:  10%|▉         | 295/3000 [02:09<19:21,  2.33it/s, v_num=1, train_loss_step=4.42e+6, train_loss_epoch=4.36e+6]Epoch 296/3000:  10%|▉         | 295/3000 [02:09<19:21,  2.33it/s, v_num=1, train_loss_step=4.42e+6, train_loss_epoch=4.36e+6]Epoch 296/3000:  10%|▉         | 296/3000 [02:10<18:45,  2.40it/s, v_num=1, train_loss_step=4.42e+6, train_loss_epoch=4.36e+6]Epoch 296/3000:  10%|▉         | 296/3000 [02:10<18:45,  2.40it/s, v_num=1, train_loss_step=4.72e+6, train_loss_epoch=4.35e+6]Epoch 297/3000:  10%|▉         | 296/3000 [02:10<18:45,  2.40it/s, v_num=1, train_loss_step=4.72e+6, train_loss_epoch=4.35e+6]Epoch 297/3000:  10%|▉         | 297/3000 [02:10<19:38,  2.29it/s, v_num=1, train_loss_step=4.72e+6, train_loss_epoch=4.35e+6]Epoch 297/3000:  10%|▉         | 297/3000 [02:10<19:38,  2.29it/s, v_num=1, train_loss_step=4.62e+6, train_loss_epoch=4.34e+6]Epoch 298/3000:  10%|▉         | 297/3000 [02:10<19:38,  2.29it/s, v_num=1, train_loss_step=4.62e+6, train_loss_epoch=4.34e+6]Epoch 298/3000:  10%|▉         | 298/3000 [02:10<19:23,  2.32it/s, v_num=1, train_loss_step=4.62e+6, train_loss_epoch=4.34e+6]Epoch 298/3000:  10%|▉         | 298/3000 [02:10<19:23,  2.32it/s, v_num=1, train_loss_step=4.52e+6, train_loss_epoch=4.33e+6]Epoch 299/3000:  10%|▉         | 298/3000 [02:10<19:23,  2.32it/s, v_num=1, train_loss_step=4.52e+6, train_loss_epoch=4.33e+6]Epoch 299/3000:  10%|▉         | 299/3000 [02:11<20:01,  2.25it/s, v_num=1, train_loss_step=4.52e+6, train_loss_epoch=4.33e+6]Epoch 299/3000:  10%|▉         | 299/3000 [02:11<20:01,  2.25it/s, v_num=1, train_loss_step=4.61e+6, train_loss_epoch=4.32e+6]Epoch 300/3000:  10%|▉         | 299/3000 [02:11<20:01,  2.25it/s, v_num=1, train_loss_step=4.61e+6, train_loss_epoch=4.32e+6]Epoch 300/3000:  10%|█         | 300/3000 [02:11<20:27,  2.20it/s, v_num=1, train_loss_step=4.61e+6, train_loss_epoch=4.32e+6]Epoch 300/3000:  10%|█         | 300/3000 [02:11<20:27,  2.20it/s, v_num=1, train_loss_step=4.24e+6, train_loss_epoch=4.3e+6] Epoch 301/3000:  10%|█         | 300/3000 [02:11<20:27,  2.20it/s, v_num=1, train_loss_step=4.24e+6, train_loss_epoch=4.3e+6]Epoch 301/3000:  10%|█         | 301/3000 [02:12<20:11,  2.23it/s, v_num=1, train_loss_step=4.24e+6, train_loss_epoch=4.3e+6]Epoch 301/3000:  10%|█         | 301/3000 [02:12<20:11,  2.23it/s, v_num=1, train_loss_step=4.18e+6, train_loss_epoch=4.29e+6]Epoch 302/3000:  10%|█         | 301/3000 [02:12<20:11,  2.23it/s, v_num=1, train_loss_step=4.18e+6, train_loss_epoch=4.29e+6]Epoch 302/3000:  10%|█         | 302/3000 [02:12<19:37,  2.29it/s, v_num=1, train_loss_step=4.18e+6, train_loss_epoch=4.29e+6]Epoch 302/3000:  10%|█         | 302/3000 [02:12<19:37,  2.29it/s, v_num=1, train_loss_step=3.96e+6, train_loss_epoch=4.28e+6]Epoch 303/3000:  10%|█         | 302/3000 [02:12<19:37,  2.29it/s, v_num=1, train_loss_step=3.96e+6, train_loss_epoch=4.28e+6]Epoch 303/3000:  10%|█         | 303/3000 [02:13<19:32,  2.30it/s, v_num=1, train_loss_step=3.96e+6, train_loss_epoch=4.28e+6]Epoch 303/3000:  10%|█         | 303/3000 [02:13<19:32,  2.30it/s, v_num=1, train_loss_step=4.09e+6, train_loss_epoch=4.27e+6]Epoch 304/3000:  10%|█         | 303/3000 [02:13<19:32,  2.30it/s, v_num=1, train_loss_step=4.09e+6, train_loss_epoch=4.27e+6]Epoch 304/3000:  10%|█         | 304/3000 [02:13<19:11,  2.34it/s, v_num=1, train_loss_step=4.09e+6, train_loss_epoch=4.27e+6]Epoch 304/3000:  10%|█         | 304/3000 [02:13<19:11,  2.34it/s, v_num=1, train_loss_step=4.22e+6, train_loss_epoch=4.26e+6]Epoch 305/3000:  10%|█         | 304/3000 [02:13<19:11,  2.34it/s, v_num=1, train_loss_step=4.22e+6, train_loss_epoch=4.26e+6]Epoch 305/3000:  10%|█         | 305/3000 [02:14<19:34,  2.29it/s, v_num=1, train_loss_step=4.22e+6, train_loss_epoch=4.26e+6]Epoch 305/3000:  10%|█         | 305/3000 [02:14<19:34,  2.29it/s, v_num=1, train_loss_step=4.1e+6, train_loss_epoch=4.24e+6] Epoch 306/3000:  10%|█         | 305/3000 [02:14<19:34,  2.29it/s, v_num=1, train_loss_step=4.1e+6, train_loss_epoch=4.24e+6]Epoch 306/3000:  10%|█         | 306/3000 [02:14<20:17,  2.21it/s, v_num=1, train_loss_step=4.1e+6, train_loss_epoch=4.24e+6]Epoch 306/3000:  10%|█         | 306/3000 [02:14<20:17,  2.21it/s, v_num=1, train_loss_step=3.97e+6, train_loss_epoch=4.23e+6]Epoch 307/3000:  10%|█         | 306/3000 [02:14<20:17,  2.21it/s, v_num=1, train_loss_step=3.97e+6, train_loss_epoch=4.23e+6]Epoch 307/3000:  10%|█         | 307/3000 [02:15<20:21,  2.20it/s, v_num=1, train_loss_step=3.97e+6, train_loss_epoch=4.23e+6]Epoch 307/3000:  10%|█         | 307/3000 [02:15<20:21,  2.20it/s, v_num=1, train_loss_step=4.45e+6, train_loss_epoch=4.22e+6]Epoch 308/3000:  10%|█         | 307/3000 [02:15<20:21,  2.20it/s, v_num=1, train_loss_step=4.45e+6, train_loss_epoch=4.22e+6]Epoch 308/3000:  10%|█         | 308/3000 [02:15<20:04,  2.23it/s, v_num=1, train_loss_step=4.45e+6, train_loss_epoch=4.22e+6]Epoch 308/3000:  10%|█         | 308/3000 [02:15<20:04,  2.23it/s, v_num=1, train_loss_step=4.46e+6, train_loss_epoch=4.21e+6]Epoch 309/3000:  10%|█         | 308/3000 [02:15<20:04,  2.23it/s, v_num=1, train_loss_step=4.46e+6, train_loss_epoch=4.21e+6]Epoch 309/3000:  10%|█         | 309/3000 [02:15<20:10,  2.22it/s, v_num=1, train_loss_step=4.46e+6, train_loss_epoch=4.21e+6]Epoch 309/3000:  10%|█         | 309/3000 [02:15<20:10,  2.22it/s, v_num=1, train_loss_step=4.05e+6, train_loss_epoch=4.2e+6] Epoch 310/3000:  10%|█         | 309/3000 [02:15<20:10,  2.22it/s, v_num=1, train_loss_step=4.05e+6, train_loss_epoch=4.2e+6]Epoch 310/3000:  10%|█         | 310/3000 [02:16<19:52,  2.26it/s, v_num=1, train_loss_step=4.05e+6, train_loss_epoch=4.2e+6]Epoch 310/3000:  10%|█         | 310/3000 [02:16<19:52,  2.26it/s, v_num=1, train_loss_step=4.02e+6, train_loss_epoch=4.19e+6]Epoch 311/3000:  10%|█         | 310/3000 [02:16<19:52,  2.26it/s, v_num=1, train_loss_step=4.02e+6, train_loss_epoch=4.19e+6]Epoch 311/3000:  10%|█         | 311/3000 [02:16<19:34,  2.29it/s, v_num=1, train_loss_step=4.02e+6, train_loss_epoch=4.19e+6]Epoch 311/3000:  10%|█         | 311/3000 [02:16<19:34,  2.29it/s, v_num=1, train_loss_step=4.12e+6, train_loss_epoch=4.18e+6]Epoch 312/3000:  10%|█         | 311/3000 [02:16<19:34,  2.29it/s, v_num=1, train_loss_step=4.12e+6, train_loss_epoch=4.18e+6]Epoch 312/3000:  10%|█         | 312/3000 [02:17<20:16,  2.21it/s, v_num=1, train_loss_step=4.12e+6, train_loss_epoch=4.18e+6]Epoch 312/3000:  10%|█         | 312/3000 [02:17<20:16,  2.21it/s, v_num=1, train_loss_step=4.59e+6, train_loss_epoch=4.16e+6]Epoch 313/3000:  10%|█         | 312/3000 [02:17<20:16,  2.21it/s, v_num=1, train_loss_step=4.59e+6, train_loss_epoch=4.16e+6]Epoch 313/3000:  10%|█         | 313/3000 [02:17<18:59,  2.36it/s, v_num=1, train_loss_step=4.59e+6, train_loss_epoch=4.16e+6]Epoch 313/3000:  10%|█         | 313/3000 [02:17<18:59,  2.36it/s, v_num=1, train_loss_step=3.97e+6, train_loss_epoch=4.15e+6]Epoch 314/3000:  10%|█         | 313/3000 [02:17<18:59,  2.36it/s, v_num=1, train_loss_step=3.97e+6, train_loss_epoch=4.15e+6]Epoch 314/3000:  10%|█         | 314/3000 [02:18<19:38,  2.28it/s, v_num=1, train_loss_step=3.97e+6, train_loss_epoch=4.15e+6]Epoch 314/3000:  10%|█         | 314/3000 [02:18<19:38,  2.28it/s, v_num=1, train_loss_step=4.19e+6, train_loss_epoch=4.14e+6]Epoch 315/3000:  10%|█         | 314/3000 [02:18<19:38,  2.28it/s, v_num=1, train_loss_step=4.19e+6, train_loss_epoch=4.14e+6]Epoch 315/3000:  10%|█         | 315/3000 [02:18<18:58,  2.36it/s, v_num=1, train_loss_step=4.19e+6, train_loss_epoch=4.14e+6]Epoch 315/3000:  10%|█         | 315/3000 [02:18<18:58,  2.36it/s, v_num=1, train_loss_step=4.34e+6, train_loss_epoch=4.13e+6]Epoch 316/3000:  10%|█         | 315/3000 [02:18<18:58,  2.36it/s, v_num=1, train_loss_step=4.34e+6, train_loss_epoch=4.13e+6]Epoch 316/3000:  11%|█         | 316/3000 [02:19<20:32,  2.18it/s, v_num=1, train_loss_step=4.34e+6, train_loss_epoch=4.13e+6]Epoch 316/3000:  11%|█         | 316/3000 [02:19<20:32,  2.18it/s, v_num=1, train_loss_step=4.44e+6, train_loss_epoch=4.12e+6]Epoch 317/3000:  11%|█         | 316/3000 [02:19<20:32,  2.18it/s, v_num=1, train_loss_step=4.44e+6, train_loss_epoch=4.12e+6]Epoch 317/3000:  11%|█         | 317/3000 [02:19<21:08,  2.11it/s, v_num=1, train_loss_step=4.44e+6, train_loss_epoch=4.12e+6]Epoch 317/3000:  11%|█         | 317/3000 [02:19<21:08,  2.11it/s, v_num=1, train_loss_step=4.13e+6, train_loss_epoch=4.11e+6]Epoch 318/3000:  11%|█         | 317/3000 [02:19<21:08,  2.11it/s, v_num=1, train_loss_step=4.13e+6, train_loss_epoch=4.11e+6]Epoch 318/3000:  11%|█         | 318/3000 [02:19<20:42,  2.16it/s, v_num=1, train_loss_step=4.13e+6, train_loss_epoch=4.11e+6]Epoch 318/3000:  11%|█         | 318/3000 [02:19<20:42,  2.16it/s, v_num=1, train_loss_step=3.76e+6, train_loss_epoch=4.1e+6] Epoch 319/3000:  11%|█         | 318/3000 [02:19<20:42,  2.16it/s, v_num=1, train_loss_step=3.76e+6, train_loss_epoch=4.1e+6]Epoch 319/3000:  11%|█         | 319/3000 [02:20<20:13,  2.21it/s, v_num=1, train_loss_step=3.76e+6, train_loss_epoch=4.1e+6]Epoch 319/3000:  11%|█         | 319/3000 [02:20<20:13,  2.21it/s, v_num=1, train_loss_step=3.8e+6, train_loss_epoch=4.09e+6]Epoch 320/3000:  11%|█         | 319/3000 [02:20<20:13,  2.21it/s, v_num=1, train_loss_step=3.8e+6, train_loss_epoch=4.09e+6]Epoch 320/3000:  11%|█         | 320/3000 [02:20<20:28,  2.18it/s, v_num=1, train_loss_step=3.8e+6, train_loss_epoch=4.09e+6]Epoch 320/3000:  11%|█         | 320/3000 [02:20<20:28,  2.18it/s, v_num=1, train_loss_step=3.88e+6, train_loss_epoch=4.08e+6]Epoch 321/3000:  11%|█         | 320/3000 [02:20<20:28,  2.18it/s, v_num=1, train_loss_step=3.88e+6, train_loss_epoch=4.08e+6]Epoch 321/3000:  11%|█         | 321/3000 [02:21<20:46,  2.15it/s, v_num=1, train_loss_step=3.88e+6, train_loss_epoch=4.08e+6]Epoch 321/3000:  11%|█         | 321/3000 [02:21<20:46,  2.15it/s, v_num=1, train_loss_step=4.32e+6, train_loss_epoch=4.07e+6]Epoch 322/3000:  11%|█         | 321/3000 [02:21<20:46,  2.15it/s, v_num=1, train_loss_step=4.32e+6, train_loss_epoch=4.07e+6]Epoch 322/3000:  11%|█         | 322/3000 [02:21<20:04,  2.22it/s, v_num=1, train_loss_step=4.32e+6, train_loss_epoch=4.07e+6]Epoch 322/3000:  11%|█         | 322/3000 [02:21<20:04,  2.22it/s, v_num=1, train_loss_step=4.1e+6, train_loss_epoch=4.06e+6] Epoch 323/3000:  11%|█         | 322/3000 [02:21<20:04,  2.22it/s, v_num=1, train_loss_step=4.1e+6, train_loss_epoch=4.06e+6]Epoch 323/3000:  11%|█         | 323/3000 [02:22<19:09,  2.33it/s, v_num=1, train_loss_step=4.1e+6, train_loss_epoch=4.06e+6]Epoch 323/3000:  11%|█         | 323/3000 [02:22<19:09,  2.33it/s, v_num=1, train_loss_step=3.8e+6, train_loss_epoch=4.05e+6]Epoch 324/3000:  11%|█         | 323/3000 [02:22<19:09,  2.33it/s, v_num=1, train_loss_step=3.8e+6, train_loss_epoch=4.05e+6]Epoch 324/3000:  11%|█         | 324/3000 [02:22<18:27,  2.42it/s, v_num=1, train_loss_step=3.8e+6, train_loss_epoch=4.05e+6]Epoch 324/3000:  11%|█         | 324/3000 [02:22<18:27,  2.42it/s, v_num=1, train_loss_step=4.6e+6, train_loss_epoch=4.03e+6]Epoch 325/3000:  11%|█         | 324/3000 [02:22<18:27,  2.42it/s, v_num=1, train_loss_step=4.6e+6, train_loss_epoch=4.03e+6]Epoch 325/3000:  11%|█         | 325/3000 [02:23<19:49,  2.25it/s, v_num=1, train_loss_step=4.6e+6, train_loss_epoch=4.03e+6]Epoch 325/3000:  11%|█         | 325/3000 [02:23<19:49,  2.25it/s, v_num=1, train_loss_step=4.14e+6, train_loss_epoch=4.02e+6]Epoch 326/3000:  11%|█         | 325/3000 [02:23<19:49,  2.25it/s, v_num=1, train_loss_step=4.14e+6, train_loss_epoch=4.02e+6]Epoch 326/3000:  11%|█         | 326/3000 [02:23<19:58,  2.23it/s, v_num=1, train_loss_step=4.14e+6, train_loss_epoch=4.02e+6]Epoch 326/3000:  11%|█         | 326/3000 [02:23<19:58,  2.23it/s, v_num=1, train_loss_step=3.96e+6, train_loss_epoch=4.01e+6]Epoch 327/3000:  11%|█         | 326/3000 [02:23<19:58,  2.23it/s, v_num=1, train_loss_step=3.96e+6, train_loss_epoch=4.01e+6]Epoch 327/3000:  11%|█         | 327/3000 [02:23<20:06,  2.22it/s, v_num=1, train_loss_step=3.96e+6, train_loss_epoch=4.01e+6]Epoch 327/3000:  11%|█         | 327/3000 [02:23<20:06,  2.22it/s, v_num=1, train_loss_step=3.75e+6, train_loss_epoch=4e+6]   Epoch 328/3000:  11%|█         | 327/3000 [02:23<20:06,  2.22it/s, v_num=1, train_loss_step=3.75e+6, train_loss_epoch=4e+6]Epoch 328/3000:  11%|█         | 328/3000 [02:24<20:12,  2.20it/s, v_num=1, train_loss_step=3.75e+6, train_loss_epoch=4e+6]Epoch 328/3000:  11%|█         | 328/3000 [02:24<20:12,  2.20it/s, v_num=1, train_loss_step=3.97e+6, train_loss_epoch=3.99e+6]Epoch 329/3000:  11%|█         | 328/3000 [02:24<20:12,  2.20it/s, v_num=1, train_loss_step=3.97e+6, train_loss_epoch=3.99e+6]Epoch 329/3000:  11%|█         | 329/3000 [02:24<20:19,  2.19it/s, v_num=1, train_loss_step=3.97e+6, train_loss_epoch=3.99e+6]Epoch 329/3000:  11%|█         | 329/3000 [02:24<20:19,  2.19it/s, v_num=1, train_loss_step=3.81e+6, train_loss_epoch=3.98e+6]Epoch 330/3000:  11%|█         | 329/3000 [02:24<20:19,  2.19it/s, v_num=1, train_loss_step=3.81e+6, train_loss_epoch=3.98e+6]Epoch 330/3000:  11%|█         | 330/3000 [02:25<19:56,  2.23it/s, v_num=1, train_loss_step=3.81e+6, train_loss_epoch=3.98e+6]Epoch 330/3000:  11%|█         | 330/3000 [02:25<19:56,  2.23it/s, v_num=1, train_loss_step=3.82e+6, train_loss_epoch=3.97e+6]Epoch 331/3000:  11%|█         | 330/3000 [02:25<19:56,  2.23it/s, v_num=1, train_loss_step=3.82e+6, train_loss_epoch=3.97e+6]Epoch 331/3000:  11%|█         | 331/3000 [02:25<20:11,  2.20it/s, v_num=1, train_loss_step=3.82e+6, train_loss_epoch=3.97e+6]Epoch 331/3000:  11%|█         | 331/3000 [02:25<20:11,  2.20it/s, v_num=1, train_loss_step=3.9e+6, train_loss_epoch=3.96e+6] Epoch 332/3000:  11%|█         | 331/3000 [02:25<20:11,  2.20it/s, v_num=1, train_loss_step=3.9e+6, train_loss_epoch=3.96e+6]Epoch 332/3000:  11%|█         | 332/3000 [02:26<19:56,  2.23it/s, v_num=1, train_loss_step=3.9e+6, train_loss_epoch=3.96e+6]Epoch 332/3000:  11%|█         | 332/3000 [02:26<19:56,  2.23it/s, v_num=1, train_loss_step=3.83e+6, train_loss_epoch=3.95e+6]Epoch 333/3000:  11%|█         | 332/3000 [02:26<19:56,  2.23it/s, v_num=1, train_loss_step=3.83e+6, train_loss_epoch=3.95e+6]Epoch 333/3000:  11%|█         | 333/3000 [02:26<19:36,  2.27it/s, v_num=1, train_loss_step=3.83e+6, train_loss_epoch=3.95e+6]Epoch 333/3000:  11%|█         | 333/3000 [02:26<19:36,  2.27it/s, v_num=1, train_loss_step=3.76e+6, train_loss_epoch=3.94e+6]Epoch 334/3000:  11%|█         | 333/3000 [02:26<19:36,  2.27it/s, v_num=1, train_loss_step=3.76e+6, train_loss_epoch=3.94e+6]Epoch 334/3000:  11%|█         | 334/3000 [02:26<18:40,  2.38it/s, v_num=1, train_loss_step=3.76e+6, train_loss_epoch=3.94e+6]Epoch 334/3000:  11%|█         | 334/3000 [02:26<18:40,  2.38it/s, v_num=1, train_loss_step=4.09e+6, train_loss_epoch=3.93e+6]Epoch 335/3000:  11%|█         | 334/3000 [02:26<18:40,  2.38it/s, v_num=1, train_loss_step=4.09e+6, train_loss_epoch=3.93e+6]Epoch 335/3000:  11%|█         | 335/3000 [02:27<19:27,  2.28it/s, v_num=1, train_loss_step=4.09e+6, train_loss_epoch=3.93e+6]Epoch 335/3000:  11%|█         | 335/3000 [02:27<19:27,  2.28it/s, v_num=1, train_loss_step=3.93e+6, train_loss_epoch=3.92e+6]Epoch 336/3000:  11%|█         | 335/3000 [02:27<19:27,  2.28it/s, v_num=1, train_loss_step=3.93e+6, train_loss_epoch=3.92e+6]Epoch 336/3000:  11%|█         | 336/3000 [02:27<18:33,  2.39it/s, v_num=1, train_loss_step=3.93e+6, train_loss_epoch=3.92e+6]Epoch 336/3000:  11%|█         | 336/3000 [02:27<18:33,  2.39it/s, v_num=1, train_loss_step=3.75e+6, train_loss_epoch=3.91e+6]Epoch 337/3000:  11%|█         | 336/3000 [02:27<18:33,  2.39it/s, v_num=1, train_loss_step=3.75e+6, train_loss_epoch=3.91e+6]Epoch 337/3000:  11%|█         | 337/3000 [02:28<19:16,  2.30it/s, v_num=1, train_loss_step=3.75e+6, train_loss_epoch=3.91e+6]Epoch 337/3000:  11%|█         | 337/3000 [02:28<19:16,  2.30it/s, v_num=1, train_loss_step=3.93e+6, train_loss_epoch=3.9e+6] Epoch 338/3000:  11%|█         | 337/3000 [02:28<19:16,  2.30it/s, v_num=1, train_loss_step=3.93e+6, train_loss_epoch=3.9e+6]Epoch 338/3000:  11%|█▏        | 338/3000 [02:28<19:01,  2.33it/s, v_num=1, train_loss_step=3.93e+6, train_loss_epoch=3.9e+6]Epoch 338/3000:  11%|█▏        | 338/3000 [02:28<19:01,  2.33it/s, v_num=1, train_loss_step=3.76e+6, train_loss_epoch=3.89e+6]Epoch 339/3000:  11%|█▏        | 338/3000 [02:28<19:01,  2.33it/s, v_num=1, train_loss_step=3.76e+6, train_loss_epoch=3.89e+6]Epoch 339/3000:  11%|█▏        | 339/3000 [02:29<18:29,  2.40it/s, v_num=1, train_loss_step=3.76e+6, train_loss_epoch=3.89e+6]Epoch 339/3000:  11%|█▏        | 339/3000 [02:29<18:29,  2.40it/s, v_num=1, train_loss_step=3.94e+6, train_loss_epoch=3.88e+6]Epoch 340/3000:  11%|█▏        | 339/3000 [02:29<18:29,  2.40it/s, v_num=1, train_loss_step=3.94e+6, train_loss_epoch=3.88e+6]Epoch 340/3000:  11%|█▏        | 340/3000 [02:29<17:06,  2.59it/s, v_num=1, train_loss_step=3.94e+6, train_loss_epoch=3.88e+6]Epoch 340/3000:  11%|█▏        | 340/3000 [02:29<17:06,  2.59it/s, v_num=1, train_loss_step=4e+6, train_loss_epoch=3.87e+6]   Epoch 341/3000:  11%|█▏        | 340/3000 [02:29<17:06,  2.59it/s, v_num=1, train_loss_step=4e+6, train_loss_epoch=3.87e+6]Epoch 341/3000:  11%|█▏        | 341/3000 [02:29<18:00,  2.46it/s, v_num=1, train_loss_step=4e+6, train_loss_epoch=3.87e+6]Epoch 341/3000:  11%|█▏        | 341/3000 [02:29<18:00,  2.46it/s, v_num=1, train_loss_step=3.97e+6, train_loss_epoch=3.87e+6]Epoch 342/3000:  11%|█▏        | 341/3000 [02:29<18:00,  2.46it/s, v_num=1, train_loss_step=3.97e+6, train_loss_epoch=3.87e+6]Epoch 342/3000:  11%|█▏        | 342/3000 [02:30<17:46,  2.49it/s, v_num=1, train_loss_step=3.97e+6, train_loss_epoch=3.87e+6]Epoch 342/3000:  11%|█▏        | 342/3000 [02:30<17:46,  2.49it/s, v_num=1, train_loss_step=3.68e+6, train_loss_epoch=3.86e+6]Epoch 343/3000:  11%|█▏        | 342/3000 [02:30<17:46,  2.49it/s, v_num=1, train_loss_step=3.68e+6, train_loss_epoch=3.86e+6]Epoch 343/3000:  11%|█▏        | 343/3000 [02:30<18:51,  2.35it/s, v_num=1, train_loss_step=3.68e+6, train_loss_epoch=3.86e+6]Epoch 343/3000:  11%|█▏        | 343/3000 [02:30<18:51,  2.35it/s, v_num=1, train_loss_step=3.29e+6, train_loss_epoch=3.85e+6]Epoch 344/3000:  11%|█▏        | 343/3000 [02:30<18:51,  2.35it/s, v_num=1, train_loss_step=3.29e+6, train_loss_epoch=3.85e+6]Epoch 344/3000:  11%|█▏        | 344/3000 [02:31<18:32,  2.39it/s, v_num=1, train_loss_step=3.29e+6, train_loss_epoch=3.85e+6]Epoch 344/3000:  11%|█▏        | 344/3000 [02:31<18:32,  2.39it/s, v_num=1, train_loss_step=4.05e+6, train_loss_epoch=3.84e+6]Epoch 345/3000:  11%|█▏        | 344/3000 [02:31<18:32,  2.39it/s, v_num=1, train_loss_step=4.05e+6, train_loss_epoch=3.84e+6]Epoch 345/3000:  12%|█▏        | 345/3000 [02:31<19:11,  2.30it/s, v_num=1, train_loss_step=4.05e+6, train_loss_epoch=3.84e+6]Epoch 345/3000:  12%|█▏        | 345/3000 [02:31<19:11,  2.30it/s, v_num=1, train_loss_step=4.15e+6, train_loss_epoch=3.83e+6]Epoch 346/3000:  12%|█▏        | 345/3000 [02:31<19:11,  2.30it/s, v_num=1, train_loss_step=4.15e+6, train_loss_epoch=3.83e+6]Epoch 346/3000:  12%|█▏        | 346/3000 [02:32<18:52,  2.34it/s, v_num=1, train_loss_step=4.15e+6, train_loss_epoch=3.83e+6]Epoch 346/3000:  12%|█▏        | 346/3000 [02:32<18:52,  2.34it/s, v_num=1, train_loss_step=3.94e+6, train_loss_epoch=3.82e+6]Epoch 347/3000:  12%|█▏        | 346/3000 [02:32<18:52,  2.34it/s, v_num=1, train_loss_step=3.94e+6, train_loss_epoch=3.82e+6]Epoch 347/3000:  12%|█▏        | 347/3000 [02:32<19:42,  2.24it/s, v_num=1, train_loss_step=3.94e+6, train_loss_epoch=3.82e+6]Epoch 347/3000:  12%|█▏        | 347/3000 [02:32<19:42,  2.24it/s, v_num=1, train_loss_step=3.7e+6, train_loss_epoch=3.81e+6] Epoch 348/3000:  12%|█▏        | 347/3000 [02:32<19:42,  2.24it/s, v_num=1, train_loss_step=3.7e+6, train_loss_epoch=3.81e+6]Epoch 348/3000:  12%|█▏        | 348/3000 [02:32<19:51,  2.23it/s, v_num=1, train_loss_step=3.7e+6, train_loss_epoch=3.81e+6]Epoch 348/3000:  12%|█▏        | 348/3000 [02:32<19:51,  2.23it/s, v_num=1, train_loss_step=3.65e+6, train_loss_epoch=3.8e+6]Epoch 349/3000:  12%|█▏        | 348/3000 [02:32<19:51,  2.23it/s, v_num=1, train_loss_step=3.65e+6, train_loss_epoch=3.8e+6]Epoch 349/3000:  12%|█▏        | 349/3000 [02:33<18:55,  2.33it/s, v_num=1, train_loss_step=3.65e+6, train_loss_epoch=3.8e+6]Epoch 349/3000:  12%|█▏        | 349/3000 [02:33<18:55,  2.33it/s, v_num=1, train_loss_step=4.14e+6, train_loss_epoch=3.79e+6]Epoch 350/3000:  12%|█▏        | 349/3000 [02:33<18:55,  2.33it/s, v_num=1, train_loss_step=4.14e+6, train_loss_epoch=3.79e+6]Epoch 350/3000:  12%|█▏        | 350/3000 [02:33<19:40,  2.24it/s, v_num=1, train_loss_step=4.14e+6, train_loss_epoch=3.79e+6]Epoch 350/3000:  12%|█▏        | 350/3000 [02:33<19:40,  2.24it/s, v_num=1, train_loss_step=3.93e+6, train_loss_epoch=3.78e+6]Epoch 351/3000:  12%|█▏        | 350/3000 [02:33<19:40,  2.24it/s, v_num=1, train_loss_step=3.93e+6, train_loss_epoch=3.78e+6]Epoch 351/3000:  12%|█▏        | 351/3000 [02:34<19:00,  2.32it/s, v_num=1, train_loss_step=3.93e+6, train_loss_epoch=3.78e+6]Epoch 351/3000:  12%|█▏        | 351/3000 [02:34<19:00,  2.32it/s, v_num=1, train_loss_step=4.02e+6, train_loss_epoch=3.77e+6]Epoch 352/3000:  12%|█▏        | 351/3000 [02:34<19:00,  2.32it/s, v_num=1, train_loss_step=4.02e+6, train_loss_epoch=3.77e+6]Epoch 352/3000:  12%|█▏        | 352/3000 [02:34<19:22,  2.28it/s, v_num=1, train_loss_step=4.02e+6, train_loss_epoch=3.77e+6]Epoch 352/3000:  12%|█▏        | 352/3000 [02:34<19:22,  2.28it/s, v_num=1, train_loss_step=4e+6, train_loss_epoch=3.76e+6]   Epoch 353/3000:  12%|█▏        | 352/3000 [02:34<19:22,  2.28it/s, v_num=1, train_loss_step=4e+6, train_loss_epoch=3.76e+6]Epoch 353/3000:  12%|█▏        | 353/3000 [02:35<19:02,  2.32it/s, v_num=1, train_loss_step=4e+6, train_loss_epoch=3.76e+6]Epoch 353/3000:  12%|█▏        | 353/3000 [02:35<19:02,  2.32it/s, v_num=1, train_loss_step=3.77e+6, train_loss_epoch=3.75e+6]Epoch 354/3000:  12%|█▏        | 353/3000 [02:35<19:02,  2.32it/s, v_num=1, train_loss_step=3.77e+6, train_loss_epoch=3.75e+6]Epoch 354/3000:  12%|█▏        | 354/3000 [02:35<18:03,  2.44it/s, v_num=1, train_loss_step=3.77e+6, train_loss_epoch=3.75e+6]Epoch 354/3000:  12%|█▏        | 354/3000 [02:35<18:03,  2.44it/s, v_num=1, train_loss_step=3.74e+6, train_loss_epoch=3.74e+6]Epoch 355/3000:  12%|█▏        | 354/3000 [02:35<18:03,  2.44it/s, v_num=1, train_loss_step=3.74e+6, train_loss_epoch=3.74e+6]Epoch 355/3000:  12%|█▏        | 355/3000 [02:35<18:13,  2.42it/s, v_num=1, train_loss_step=3.74e+6, train_loss_epoch=3.74e+6]Epoch 355/3000:  12%|█▏        | 355/3000 [02:35<18:13,  2.42it/s, v_num=1, train_loss_step=3.38e+6, train_loss_epoch=3.74e+6]Epoch 356/3000:  12%|█▏        | 355/3000 [02:35<18:13,  2.42it/s, v_num=1, train_loss_step=3.38e+6, train_loss_epoch=3.74e+6]Epoch 356/3000:  12%|█▏        | 356/3000 [02:36<18:08,  2.43it/s, v_num=1, train_loss_step=3.38e+6, train_loss_epoch=3.74e+6]Epoch 356/3000:  12%|█▏        | 356/3000 [02:36<18:08,  2.43it/s, v_num=1, train_loss_step=3.77e+6, train_loss_epoch=3.73e+6]Epoch 357/3000:  12%|█▏        | 356/3000 [02:36<18:08,  2.43it/s, v_num=1, train_loss_step=3.77e+6, train_loss_epoch=3.73e+6]Epoch 357/3000:  12%|█▏        | 357/3000 [02:36<18:12,  2.42it/s, v_num=1, train_loss_step=3.77e+6, train_loss_epoch=3.73e+6]Epoch 357/3000:  12%|█▏        | 357/3000 [02:36<18:12,  2.42it/s, v_num=1, train_loss_step=3.61e+6, train_loss_epoch=3.72e+6]Epoch 358/3000:  12%|█▏        | 357/3000 [02:36<18:12,  2.42it/s, v_num=1, train_loss_step=3.61e+6, train_loss_epoch=3.72e+6]Epoch 358/3000:  12%|█▏        | 358/3000 [02:37<17:30,  2.51it/s, v_num=1, train_loss_step=3.61e+6, train_loss_epoch=3.72e+6]Epoch 358/3000:  12%|█▏        | 358/3000 [02:37<17:30,  2.51it/s, v_num=1, train_loss_step=3.88e+6, train_loss_epoch=3.71e+6]Epoch 359/3000:  12%|█▏        | 358/3000 [02:37<17:30,  2.51it/s, v_num=1, train_loss_step=3.88e+6, train_loss_epoch=3.71e+6]Epoch 359/3000:  12%|█▏        | 359/3000 [02:37<18:06,  2.43it/s, v_num=1, train_loss_step=3.88e+6, train_loss_epoch=3.71e+6]Epoch 359/3000:  12%|█▏        | 359/3000 [02:37<18:06,  2.43it/s, v_num=1, train_loss_step=3.9e+6, train_loss_epoch=3.7e+6]  Epoch 360/3000:  12%|█▏        | 359/3000 [02:37<18:06,  2.43it/s, v_num=1, train_loss_step=3.9e+6, train_loss_epoch=3.7e+6]Epoch 360/3000:  12%|█▏        | 360/3000 [02:37<17:47,  2.47it/s, v_num=1, train_loss_step=3.9e+6, train_loss_epoch=3.7e+6]Epoch 360/3000:  12%|█▏        | 360/3000 [02:37<17:47,  2.47it/s, v_num=1, train_loss_step=3.77e+6, train_loss_epoch=3.69e+6]Epoch 361/3000:  12%|█▏        | 360/3000 [02:37<17:47,  2.47it/s, v_num=1, train_loss_step=3.77e+6, train_loss_epoch=3.69e+6]Epoch 361/3000:  12%|█▏        | 361/3000 [02:38<18:58,  2.32it/s, v_num=1, train_loss_step=3.77e+6, train_loss_epoch=3.69e+6]Epoch 361/3000:  12%|█▏        | 361/3000 [02:38<18:58,  2.32it/s, v_num=1, train_loss_step=3.82e+6, train_loss_epoch=3.68e+6]Epoch 362/3000:  12%|█▏        | 361/3000 [02:38<18:58,  2.32it/s, v_num=1, train_loss_step=3.82e+6, train_loss_epoch=3.68e+6]Epoch 362/3000:  12%|█▏        | 362/3000 [02:38<18:15,  2.41it/s, v_num=1, train_loss_step=3.82e+6, train_loss_epoch=3.68e+6]Epoch 362/3000:  12%|█▏        | 362/3000 [02:38<18:15,  2.41it/s, v_num=1, train_loss_step=3.88e+6, train_loss_epoch=3.68e+6]Epoch 363/3000:  12%|█▏        | 362/3000 [02:38<18:15,  2.41it/s, v_num=1, train_loss_step=3.88e+6, train_loss_epoch=3.68e+6]Epoch 363/3000:  12%|█▏        | 363/3000 [02:39<18:25,  2.39it/s, v_num=1, train_loss_step=3.88e+6, train_loss_epoch=3.68e+6]Epoch 363/3000:  12%|█▏        | 363/3000 [02:39<18:25,  2.39it/s, v_num=1, train_loss_step=3.55e+6, train_loss_epoch=3.67e+6]Epoch 364/3000:  12%|█▏        | 363/3000 [02:39<18:25,  2.39it/s, v_num=1, train_loss_step=3.55e+6, train_loss_epoch=3.67e+6]Epoch 364/3000:  12%|█▏        | 364/3000 [02:39<18:15,  2.41it/s, v_num=1, train_loss_step=3.55e+6, train_loss_epoch=3.67e+6]Epoch 364/3000:  12%|█▏        | 364/3000 [02:39<18:15,  2.41it/s, v_num=1, train_loss_step=3.68e+6, train_loss_epoch=3.66e+6]Epoch 365/3000:  12%|█▏        | 364/3000 [02:39<18:15,  2.41it/s, v_num=1, train_loss_step=3.68e+6, train_loss_epoch=3.66e+6]Epoch 365/3000:  12%|█▏        | 365/3000 [02:40<18:29,  2.38it/s, v_num=1, train_loss_step=3.68e+6, train_loss_epoch=3.66e+6]Epoch 365/3000:  12%|█▏        | 365/3000 [02:40<18:29,  2.38it/s, v_num=1, train_loss_step=3.67e+6, train_loss_epoch=3.65e+6]Epoch 366/3000:  12%|█▏        | 365/3000 [02:40<18:29,  2.38it/s, v_num=1, train_loss_step=3.67e+6, train_loss_epoch=3.65e+6]Epoch 366/3000:  12%|█▏        | 366/3000 [02:40<18:42,  2.35it/s, v_num=1, train_loss_step=3.67e+6, train_loss_epoch=3.65e+6]Epoch 366/3000:  12%|█▏        | 366/3000 [02:40<18:42,  2.35it/s, v_num=1, train_loss_step=3.51e+6, train_loss_epoch=3.64e+6]Epoch 367/3000:  12%|█▏        | 366/3000 [02:40<18:42,  2.35it/s, v_num=1, train_loss_step=3.51e+6, train_loss_epoch=3.64e+6]Epoch 367/3000:  12%|█▏        | 367/3000 [02:40<18:38,  2.35it/s, v_num=1, train_loss_step=3.51e+6, train_loss_epoch=3.64e+6]Epoch 367/3000:  12%|█▏        | 367/3000 [02:40<18:38,  2.35it/s, v_num=1, train_loss_step=3.58e+6, train_loss_epoch=3.63e+6]Epoch 368/3000:  12%|█▏        | 367/3000 [02:40<18:38,  2.35it/s, v_num=1, train_loss_step=3.58e+6, train_loss_epoch=3.63e+6]Epoch 368/3000:  12%|█▏        | 368/3000 [02:41<19:05,  2.30it/s, v_num=1, train_loss_step=3.58e+6, train_loss_epoch=3.63e+6]Epoch 368/3000:  12%|█▏        | 368/3000 [02:41<19:05,  2.30it/s, v_num=1, train_loss_step=3.53e+6, train_loss_epoch=3.62e+6]Epoch 369/3000:  12%|█▏        | 368/3000 [02:41<19:05,  2.30it/s, v_num=1, train_loss_step=3.53e+6, train_loss_epoch=3.62e+6]Epoch 369/3000:  12%|█▏        | 369/3000 [02:41<19:00,  2.31it/s, v_num=1, train_loss_step=3.53e+6, train_loss_epoch=3.62e+6]Epoch 369/3000:  12%|█▏        | 369/3000 [02:41<19:00,  2.31it/s, v_num=1, train_loss_step=3.73e+6, train_loss_epoch=3.62e+6]Epoch 370/3000:  12%|█▏        | 369/3000 [02:41<19:00,  2.31it/s, v_num=1, train_loss_step=3.73e+6, train_loss_epoch=3.62e+6]Epoch 370/3000:  12%|█▏        | 370/3000 [02:42<19:08,  2.29it/s, v_num=1, train_loss_step=3.73e+6, train_loss_epoch=3.62e+6]Epoch 370/3000:  12%|█▏        | 370/3000 [02:42<19:08,  2.29it/s, v_num=1, train_loss_step=3.44e+6, train_loss_epoch=3.61e+6]Epoch 371/3000:  12%|█▏        | 370/3000 [02:42<19:08,  2.29it/s, v_num=1, train_loss_step=3.44e+6, train_loss_epoch=3.61e+6]Epoch 371/3000:  12%|█▏        | 371/3000 [02:42<19:17,  2.27it/s, v_num=1, train_loss_step=3.44e+6, train_loss_epoch=3.61e+6]Epoch 371/3000:  12%|█▏        | 371/3000 [02:42<19:17,  2.27it/s, v_num=1, train_loss_step=3.51e+6, train_loss_epoch=3.6e+6] Epoch 372/3000:  12%|█▏        | 371/3000 [02:42<19:17,  2.27it/s, v_num=1, train_loss_step=3.51e+6, train_loss_epoch=3.6e+6]Epoch 372/3000:  12%|█▏        | 372/3000 [02:43<19:24,  2.26it/s, v_num=1, train_loss_step=3.51e+6, train_loss_epoch=3.6e+6]Epoch 372/3000:  12%|█▏        | 372/3000 [02:43<19:24,  2.26it/s, v_num=1, train_loss_step=3.26e+6, train_loss_epoch=3.59e+6]Epoch 373/3000:  12%|█▏        | 372/3000 [02:43<19:24,  2.26it/s, v_num=1, train_loss_step=3.26e+6, train_loss_epoch=3.59e+6]Epoch 373/3000:  12%|█▏        | 373/3000 [02:43<19:49,  2.21it/s, v_num=1, train_loss_step=3.26e+6, train_loss_epoch=3.59e+6]Epoch 373/3000:  12%|█▏        | 373/3000 [02:43<19:49,  2.21it/s, v_num=1, train_loss_step=3.66e+6, train_loss_epoch=3.58e+6]Epoch 374/3000:  12%|█▏        | 373/3000 [02:43<19:49,  2.21it/s, v_num=1, train_loss_step=3.66e+6, train_loss_epoch=3.58e+6]Epoch 374/3000:  12%|█▏        | 374/3000 [02:44<18:57,  2.31it/s, v_num=1, train_loss_step=3.66e+6, train_loss_epoch=3.58e+6]Epoch 374/3000:  12%|█▏        | 374/3000 [02:44<18:57,  2.31it/s, v_num=1, train_loss_step=3.74e+6, train_loss_epoch=3.58e+6]Epoch 375/3000:  12%|█▏        | 374/3000 [02:44<18:57,  2.31it/s, v_num=1, train_loss_step=3.74e+6, train_loss_epoch=3.58e+6]Epoch 375/3000:  12%|█▎        | 375/3000 [02:44<19:32,  2.24it/s, v_num=1, train_loss_step=3.74e+6, train_loss_epoch=3.58e+6]Epoch 375/3000:  12%|█▎        | 375/3000 [02:44<19:32,  2.24it/s, v_num=1, train_loss_step=3.52e+6, train_loss_epoch=3.57e+6]Epoch 376/3000:  12%|█▎        | 375/3000 [02:44<19:32,  2.24it/s, v_num=1, train_loss_step=3.52e+6, train_loss_epoch=3.57e+6]Epoch 376/3000:  13%|█▎        | 376/3000 [02:44<19:34,  2.24it/s, v_num=1, train_loss_step=3.52e+6, train_loss_epoch=3.57e+6]Epoch 376/3000:  13%|█▎        | 376/3000 [02:44<19:34,  2.24it/s, v_num=1, train_loss_step=3.51e+6, train_loss_epoch=3.56e+6]Epoch 377/3000:  13%|█▎        | 376/3000 [02:44<19:34,  2.24it/s, v_num=1, train_loss_step=3.51e+6, train_loss_epoch=3.56e+6]Epoch 377/3000:  13%|█▎        | 377/3000 [02:45<18:22,  2.38it/s, v_num=1, train_loss_step=3.51e+6, train_loss_epoch=3.56e+6]Epoch 377/3000:  13%|█▎        | 377/3000 [02:45<18:22,  2.38it/s, v_num=1, train_loss_step=3.71e+6, train_loss_epoch=3.55e+6]Epoch 378/3000:  13%|█▎        | 377/3000 [02:45<18:22,  2.38it/s, v_num=1, train_loss_step=3.71e+6, train_loss_epoch=3.55e+6]Epoch 378/3000:  13%|█▎        | 378/3000 [02:45<19:06,  2.29it/s, v_num=1, train_loss_step=3.71e+6, train_loss_epoch=3.55e+6]Epoch 378/3000:  13%|█▎        | 378/3000 [02:45<19:06,  2.29it/s, v_num=1, train_loss_step=3.23e+6, train_loss_epoch=3.54e+6]Epoch 379/3000:  13%|█▎        | 378/3000 [02:45<19:06,  2.29it/s, v_num=1, train_loss_step=3.23e+6, train_loss_epoch=3.54e+6]Epoch 379/3000:  13%|█▎        | 379/3000 [02:46<18:38,  2.34it/s, v_num=1, train_loss_step=3.23e+6, train_loss_epoch=3.54e+6]Epoch 379/3000:  13%|█▎        | 379/3000 [02:46<18:38,  2.34it/s, v_num=1, train_loss_step=3.6e+6, train_loss_epoch=3.54e+6] Epoch 380/3000:  13%|█▎        | 379/3000 [02:46<18:38,  2.34it/s, v_num=1, train_loss_step=3.6e+6, train_loss_epoch=3.54e+6]Epoch 380/3000:  13%|█▎        | 380/3000 [02:46<17:30,  2.49it/s, v_num=1, train_loss_step=3.6e+6, train_loss_epoch=3.54e+6]Epoch 380/3000:  13%|█▎        | 380/3000 [02:46<17:30,  2.49it/s, v_num=1, train_loss_step=3.48e+6, train_loss_epoch=3.53e+6]Epoch 381/3000:  13%|█▎        | 380/3000 [02:46<17:30,  2.49it/s, v_num=1, train_loss_step=3.48e+6, train_loss_epoch=3.53e+6]Epoch 381/3000:  13%|█▎        | 381/3000 [02:46<17:43,  2.46it/s, v_num=1, train_loss_step=3.48e+6, train_loss_epoch=3.53e+6]Epoch 381/3000:  13%|█▎        | 381/3000 [02:46<17:43,  2.46it/s, v_num=1, train_loss_step=3.92e+6, train_loss_epoch=3.52e+6]Epoch 382/3000:  13%|█▎        | 381/3000 [02:46<17:43,  2.46it/s, v_num=1, train_loss_step=3.92e+6, train_loss_epoch=3.52e+6]Epoch 382/3000:  13%|█▎        | 382/3000 [02:47<18:10,  2.40it/s, v_num=1, train_loss_step=3.92e+6, train_loss_epoch=3.52e+6]Epoch 382/3000:  13%|█▎        | 382/3000 [02:47<18:10,  2.40it/s, v_num=1, train_loss_step=3.65e+6, train_loss_epoch=3.51e+6]Epoch 383/3000:  13%|█▎        | 382/3000 [02:47<18:10,  2.40it/s, v_num=1, train_loss_step=3.65e+6, train_loss_epoch=3.51e+6]Epoch 383/3000:  13%|█▎        | 383/3000 [02:47<18:40,  2.34it/s, v_num=1, train_loss_step=3.65e+6, train_loss_epoch=3.51e+6]Epoch 383/3000:  13%|█▎        | 383/3000 [02:47<18:40,  2.34it/s, v_num=1, train_loss_step=3.43e+6, train_loss_epoch=3.51e+6]Epoch 384/3000:  13%|█▎        | 383/3000 [02:47<18:40,  2.34it/s, v_num=1, train_loss_step=3.43e+6, train_loss_epoch=3.51e+6]Epoch 384/3000:  13%|█▎        | 384/3000 [02:48<18:41,  2.33it/s, v_num=1, train_loss_step=3.43e+6, train_loss_epoch=3.51e+6]Epoch 384/3000:  13%|█▎        | 384/3000 [02:48<18:41,  2.33it/s, v_num=1, train_loss_step=3.81e+6, train_loss_epoch=3.5e+6] Epoch 385/3000:  13%|█▎        | 384/3000 [02:48<18:41,  2.33it/s, v_num=1, train_loss_step=3.81e+6, train_loss_epoch=3.5e+6]Epoch 385/3000:  13%|█▎        | 385/3000 [02:48<18:22,  2.37it/s, v_num=1, train_loss_step=3.81e+6, train_loss_epoch=3.5e+6]Epoch 385/3000:  13%|█▎        | 385/3000 [02:48<18:22,  2.37it/s, v_num=1, train_loss_step=3.36e+6, train_loss_epoch=3.49e+6]Epoch 386/3000:  13%|█▎        | 385/3000 [02:48<18:22,  2.37it/s, v_num=1, train_loss_step=3.36e+6, train_loss_epoch=3.49e+6]Epoch 386/3000:  13%|█▎        | 386/3000 [02:49<17:19,  2.52it/s, v_num=1, train_loss_step=3.36e+6, train_loss_epoch=3.49e+6]Epoch 386/3000:  13%|█▎        | 386/3000 [02:49<17:19,  2.52it/s, v_num=1, train_loss_step=3.48e+6, train_loss_epoch=3.48e+6]Epoch 387/3000:  13%|█▎        | 386/3000 [02:49<17:19,  2.52it/s, v_num=1, train_loss_step=3.48e+6, train_loss_epoch=3.48e+6]Epoch 387/3000:  13%|█▎        | 387/3000 [02:49<16:51,  2.58it/s, v_num=1, train_loss_step=3.48e+6, train_loss_epoch=3.48e+6]Epoch 387/3000:  13%|█▎        | 387/3000 [02:49<16:51,  2.58it/s, v_num=1, train_loss_step=3.52e+6, train_loss_epoch=3.47e+6]Epoch 388/3000:  13%|█▎        | 387/3000 [02:49<16:51,  2.58it/s, v_num=1, train_loss_step=3.52e+6, train_loss_epoch=3.47e+6]Epoch 388/3000:  13%|█▎        | 388/3000 [02:49<18:25,  2.36it/s, v_num=1, train_loss_step=3.52e+6, train_loss_epoch=3.47e+6]Epoch 388/3000:  13%|█▎        | 388/3000 [02:49<18:25,  2.36it/s, v_num=1, train_loss_step=3.49e+6, train_loss_epoch=3.47e+6]Epoch 389/3000:  13%|█▎        | 388/3000 [02:49<18:25,  2.36it/s, v_num=1, train_loss_step=3.49e+6, train_loss_epoch=3.47e+6]Epoch 389/3000:  13%|█▎        | 389/3000 [02:50<19:13,  2.26it/s, v_num=1, train_loss_step=3.49e+6, train_loss_epoch=3.47e+6]Epoch 389/3000:  13%|█▎        | 389/3000 [02:50<19:13,  2.26it/s, v_num=1, train_loss_step=3.82e+6, train_loss_epoch=3.46e+6]Epoch 390/3000:  13%|█▎        | 389/3000 [02:50<19:13,  2.26it/s, v_num=1, train_loss_step=3.82e+6, train_loss_epoch=3.46e+6]Epoch 390/3000:  13%|█▎        | 390/3000 [02:50<18:58,  2.29it/s, v_num=1, train_loss_step=3.82e+6, train_loss_epoch=3.46e+6]Epoch 390/3000:  13%|█▎        | 390/3000 [02:50<18:58,  2.29it/s, v_num=1, train_loss_step=3.22e+6, train_loss_epoch=3.45e+6]Epoch 391/3000:  13%|█▎        | 390/3000 [02:50<18:58,  2.29it/s, v_num=1, train_loss_step=3.22e+6, train_loss_epoch=3.45e+6]Epoch 391/3000:  13%|█▎        | 391/3000 [02:51<19:10,  2.27it/s, v_num=1, train_loss_step=3.22e+6, train_loss_epoch=3.45e+6]Epoch 391/3000:  13%|█▎        | 391/3000 [02:51<19:10,  2.27it/s, v_num=1, train_loss_step=3.52e+6, train_loss_epoch=3.44e+6]Epoch 392/3000:  13%|█▎        | 391/3000 [02:51<19:10,  2.27it/s, v_num=1, train_loss_step=3.52e+6, train_loss_epoch=3.44e+6]Epoch 392/3000:  13%|█▎        | 392/3000 [02:51<18:21,  2.37it/s, v_num=1, train_loss_step=3.52e+6, train_loss_epoch=3.44e+6]Epoch 392/3000:  13%|█▎        | 392/3000 [02:51<18:21,  2.37it/s, v_num=1, train_loss_step=3.52e+6, train_loss_epoch=3.44e+6]Epoch 393/3000:  13%|█▎        | 392/3000 [02:51<18:21,  2.37it/s, v_num=1, train_loss_step=3.52e+6, train_loss_epoch=3.44e+6]Epoch 393/3000:  13%|█▎        | 393/3000 [02:51<17:42,  2.45it/s, v_num=1, train_loss_step=3.52e+6, train_loss_epoch=3.44e+6]Epoch 393/3000:  13%|█▎        | 393/3000 [02:51<17:42,  2.45it/s, v_num=1, train_loss_step=3.34e+6, train_loss_epoch=3.43e+6]Epoch 394/3000:  13%|█▎        | 393/3000 [02:51<17:42,  2.45it/s, v_num=1, train_loss_step=3.34e+6, train_loss_epoch=3.43e+6]Epoch 394/3000:  13%|█▎        | 394/3000 [02:52<18:12,  2.39it/s, v_num=1, train_loss_step=3.34e+6, train_loss_epoch=3.43e+6]Epoch 394/3000:  13%|█▎        | 394/3000 [02:52<18:12,  2.39it/s, v_num=1, train_loss_step=3.16e+6, train_loss_epoch=3.42e+6]Epoch 395/3000:  13%|█▎        | 394/3000 [02:52<18:12,  2.39it/s, v_num=1, train_loss_step=3.16e+6, train_loss_epoch=3.42e+6]Epoch 395/3000:  13%|█▎        | 395/3000 [02:52<18:17,  2.37it/s, v_num=1, train_loss_step=3.16e+6, train_loss_epoch=3.42e+6]Epoch 395/3000:  13%|█▎        | 395/3000 [02:52<18:17,  2.37it/s, v_num=1, train_loss_step=3.06e+6, train_loss_epoch=3.42e+6]Epoch 396/3000:  13%|█▎        | 395/3000 [02:52<18:17,  2.37it/s, v_num=1, train_loss_step=3.06e+6, train_loss_epoch=3.42e+6]Epoch 396/3000:  13%|█▎        | 396/3000 [02:53<17:32,  2.47it/s, v_num=1, train_loss_step=3.06e+6, train_loss_epoch=3.42e+6]Epoch 396/3000:  13%|█▎        | 396/3000 [02:53<17:32,  2.47it/s, v_num=1, train_loss_step=3.32e+6, train_loss_epoch=3.41e+6]Epoch 397/3000:  13%|█▎        | 396/3000 [02:53<17:32,  2.47it/s, v_num=1, train_loss_step=3.32e+6, train_loss_epoch=3.41e+6]Epoch 397/3000:  13%|█▎        | 397/3000 [02:53<16:48,  2.58it/s, v_num=1, train_loss_step=3.32e+6, train_loss_epoch=3.41e+6]Epoch 397/3000:  13%|█▎        | 397/3000 [02:53<16:48,  2.58it/s, v_num=1, train_loss_step=3.33e+6, train_loss_epoch=3.4e+6] Epoch 398/3000:  13%|█▎        | 397/3000 [02:53<16:48,  2.58it/s, v_num=1, train_loss_step=3.33e+6, train_loss_epoch=3.4e+6]Epoch 398/3000:  13%|█▎        | 398/3000 [02:53<16:57,  2.56it/s, v_num=1, train_loss_step=3.33e+6, train_loss_epoch=3.4e+6]Epoch 398/3000:  13%|█▎        | 398/3000 [02:53<16:57,  2.56it/s, v_num=1, train_loss_step=3.41e+6, train_loss_epoch=3.39e+6]Epoch 399/3000:  13%|█▎        | 398/3000 [02:53<16:57,  2.56it/s, v_num=1, train_loss_step=3.41e+6, train_loss_epoch=3.39e+6]Epoch 399/3000:  13%|█▎        | 399/3000 [02:54<17:33,  2.47it/s, v_num=1, train_loss_step=3.41e+6, train_loss_epoch=3.39e+6]Epoch 399/3000:  13%|█▎        | 399/3000 [02:54<17:33,  2.47it/s, v_num=1, train_loss_step=3.31e+6, train_loss_epoch=3.39e+6]Epoch 400/3000:  13%|█▎        | 399/3000 [02:54<17:33,  2.47it/s, v_num=1, train_loss_step=3.31e+6, train_loss_epoch=3.39e+6]Epoch 400/3000:  13%|█▎        | 400/3000 [02:54<18:05,  2.40it/s, v_num=1, train_loss_step=3.31e+6, train_loss_epoch=3.39e+6]Epoch 400/3000:  13%|█▎        | 400/3000 [02:54<18:05,  2.40it/s, v_num=1, train_loss_step=3.38e+6, train_loss_epoch=3.38e+6]Epoch 401/3000:  13%|█▎        | 400/3000 [02:54<18:05,  2.40it/s, v_num=1, train_loss_step=3.38e+6, train_loss_epoch=3.38e+6]Epoch 401/3000:  13%|█▎        | 401/3000 [02:55<18:10,  2.38it/s, v_num=1, train_loss_step=3.38e+6, train_loss_epoch=3.38e+6]Epoch 401/3000:  13%|█▎        | 401/3000 [02:55<18:10,  2.38it/s, v_num=1, train_loss_step=3.49e+6, train_loss_epoch=3.37e+6]Epoch 402/3000:  13%|█▎        | 401/3000 [02:55<18:10,  2.38it/s, v_num=1, train_loss_step=3.49e+6, train_loss_epoch=3.37e+6]Epoch 402/3000:  13%|█▎        | 402/3000 [02:55<18:15,  2.37it/s, v_num=1, train_loss_step=3.49e+6, train_loss_epoch=3.37e+6]Epoch 402/3000:  13%|█▎        | 402/3000 [02:55<18:15,  2.37it/s, v_num=1, train_loss_step=3.22e+6, train_loss_epoch=3.37e+6]Epoch 403/3000:  13%|█▎        | 402/3000 [02:55<18:15,  2.37it/s, v_num=1, train_loss_step=3.22e+6, train_loss_epoch=3.37e+6]Epoch 403/3000:  13%|█▎        | 403/3000 [02:56<19:28,  2.22it/s, v_num=1, train_loss_step=3.22e+6, train_loss_epoch=3.37e+6]Epoch 403/3000:  13%|█▎        | 403/3000 [02:56<19:28,  2.22it/s, v_num=1, train_loss_step=3.37e+6, train_loss_epoch=3.36e+6]Epoch 404/3000:  13%|█▎        | 403/3000 [02:56<19:28,  2.22it/s, v_num=1, train_loss_step=3.37e+6, train_loss_epoch=3.36e+6]Epoch 404/3000:  13%|█▎        | 404/3000 [02:56<19:39,  2.20it/s, v_num=1, train_loss_step=3.37e+6, train_loss_epoch=3.36e+6]Epoch 404/3000:  13%|█▎        | 404/3000 [02:56<19:39,  2.20it/s, v_num=1, train_loss_step=3e+6, train_loss_epoch=3.35e+6]   Epoch 405/3000:  13%|█▎        | 404/3000 [02:56<19:39,  2.20it/s, v_num=1, train_loss_step=3e+6, train_loss_epoch=3.35e+6]Epoch 405/3000:  14%|█▎        | 405/3000 [02:57<19:04,  2.27it/s, v_num=1, train_loss_step=3e+6, train_loss_epoch=3.35e+6]Epoch 405/3000:  14%|█▎        | 405/3000 [02:57<19:04,  2.27it/s, v_num=1, train_loss_step=3.1e+6, train_loss_epoch=3.35e+6]Epoch 406/3000:  14%|█▎        | 405/3000 [02:57<19:04,  2.27it/s, v_num=1, train_loss_step=3.1e+6, train_loss_epoch=3.35e+6]Epoch 406/3000:  14%|█▎        | 406/3000 [02:57<17:59,  2.40it/s, v_num=1, train_loss_step=3.1e+6, train_loss_epoch=3.35e+6]Epoch 406/3000:  14%|█▎        | 406/3000 [02:57<17:59,  2.40it/s, v_num=1, train_loss_step=3.13e+6, train_loss_epoch=3.34e+6]Epoch 407/3000:  14%|█▎        | 406/3000 [02:57<17:59,  2.40it/s, v_num=1, train_loss_step=3.13e+6, train_loss_epoch=3.34e+6]Epoch 407/3000:  14%|█▎        | 407/3000 [02:57<17:16,  2.50it/s, v_num=1, train_loss_step=3.13e+6, train_loss_epoch=3.34e+6]Epoch 407/3000:  14%|█▎        | 407/3000 [02:57<17:16,  2.50it/s, v_num=1, train_loss_step=3.43e+6, train_loss_epoch=3.33e+6]Epoch 408/3000:  14%|█▎        | 407/3000 [02:57<17:16,  2.50it/s, v_num=1, train_loss_step=3.43e+6, train_loss_epoch=3.33e+6]Epoch 408/3000:  14%|█▎        | 408/3000 [02:58<18:00,  2.40it/s, v_num=1, train_loss_step=3.43e+6, train_loss_epoch=3.33e+6]Epoch 408/3000:  14%|█▎        | 408/3000 [02:58<18:00,  2.40it/s, v_num=1, train_loss_step=3.26e+6, train_loss_epoch=3.33e+6]Epoch 409/3000:  14%|█▎        | 408/3000 [02:58<18:00,  2.40it/s, v_num=1, train_loss_step=3.26e+6, train_loss_epoch=3.33e+6]Epoch 409/3000:  14%|█▎        | 409/3000 [02:58<18:09,  2.38it/s, v_num=1, train_loss_step=3.26e+6, train_loss_epoch=3.33e+6]Epoch 409/3000:  14%|█▎        | 409/3000 [02:58<18:09,  2.38it/s, v_num=1, train_loss_step=3.13e+6, train_loss_epoch=3.32e+6]Epoch 410/3000:  14%|█▎        | 409/3000 [02:58<18:09,  2.38it/s, v_num=1, train_loss_step=3.13e+6, train_loss_epoch=3.32e+6]Epoch 410/3000:  14%|█▎        | 410/3000 [02:59<17:26,  2.48it/s, v_num=1, train_loss_step=3.13e+6, train_loss_epoch=3.32e+6]Epoch 410/3000:  14%|█▎        | 410/3000 [02:59<17:26,  2.48it/s, v_num=1, train_loss_step=3.24e+6, train_loss_epoch=3.31e+6]Epoch 411/3000:  14%|█▎        | 410/3000 [02:59<17:26,  2.48it/s, v_num=1, train_loss_step=3.24e+6, train_loss_epoch=3.31e+6]Epoch 411/3000:  14%|█▎        | 411/3000 [02:59<16:14,  2.66it/s, v_num=1, train_loss_step=3.24e+6, train_loss_epoch=3.31e+6]Epoch 411/3000:  14%|█▎        | 411/3000 [02:59<16:14,  2.66it/s, v_num=1, train_loss_step=3.31e+6, train_loss_epoch=3.31e+6]Epoch 412/3000:  14%|█▎        | 411/3000 [02:59<16:14,  2.66it/s, v_num=1, train_loss_step=3.31e+6, train_loss_epoch=3.31e+6]Epoch 412/3000:  14%|█▎        | 412/3000 [02:59<15:40,  2.75it/s, v_num=1, train_loss_step=3.31e+6, train_loss_epoch=3.31e+6]Epoch 412/3000:  14%|█▎        | 412/3000 [02:59<15:40,  2.75it/s, v_num=1, train_loss_step=3.15e+6, train_loss_epoch=3.3e+6] Epoch 413/3000:  14%|█▎        | 412/3000 [02:59<15:40,  2.75it/s, v_num=1, train_loss_step=3.15e+6, train_loss_epoch=3.3e+6]Epoch 413/3000:  14%|█▍        | 413/3000 [03:00<16:26,  2.62it/s, v_num=1, train_loss_step=3.15e+6, train_loss_epoch=3.3e+6]Epoch 413/3000:  14%|█▍        | 413/3000 [03:00<16:26,  2.62it/s, v_num=1, train_loss_step=3.16e+6, train_loss_epoch=3.29e+6]Epoch 414/3000:  14%|█▍        | 413/3000 [03:00<16:26,  2.62it/s, v_num=1, train_loss_step=3.16e+6, train_loss_epoch=3.29e+6]Epoch 414/3000:  14%|█▍        | 414/3000 [03:00<17:26,  2.47it/s, v_num=1, train_loss_step=3.16e+6, train_loss_epoch=3.29e+6]Epoch 414/3000:  14%|█▍        | 414/3000 [03:00<17:26,  2.47it/s, v_num=1, train_loss_step=3.27e+6, train_loss_epoch=3.29e+6]Epoch 415/3000:  14%|█▍        | 414/3000 [03:00<17:26,  2.47it/s, v_num=1, train_loss_step=3.27e+6, train_loss_epoch=3.29e+6]Epoch 415/3000:  14%|█▍        | 415/3000 [03:01<18:09,  2.37it/s, v_num=1, train_loss_step=3.27e+6, train_loss_epoch=3.29e+6]Epoch 415/3000:  14%|█▍        | 415/3000 [03:01<18:09,  2.37it/s, v_num=1, train_loss_step=2.86e+6, train_loss_epoch=3.28e+6]Epoch 416/3000:  14%|█▍        | 415/3000 [03:01<18:09,  2.37it/s, v_num=1, train_loss_step=2.86e+6, train_loss_epoch=3.28e+6]Epoch 416/3000:  14%|█▍        | 416/3000 [03:01<19:13,  2.24it/s, v_num=1, train_loss_step=2.86e+6, train_loss_epoch=3.28e+6]Epoch 416/3000:  14%|█▍        | 416/3000 [03:01<19:13,  2.24it/s, v_num=1, train_loss_step=3.12e+6, train_loss_epoch=3.27e+6]Epoch 417/3000:  14%|█▍        | 416/3000 [03:01<19:13,  2.24it/s, v_num=1, train_loss_step=3.12e+6, train_loss_epoch=3.27e+6]Epoch 417/3000:  14%|█▍        | 417/3000 [03:02<19:16,  2.23it/s, v_num=1, train_loss_step=3.12e+6, train_loss_epoch=3.27e+6]Epoch 417/3000:  14%|█▍        | 417/3000 [03:02<19:16,  2.23it/s, v_num=1, train_loss_step=2.98e+6, train_loss_epoch=3.27e+6]Epoch 418/3000:  14%|█▍        | 417/3000 [03:02<19:16,  2.23it/s, v_num=1, train_loss_step=2.98e+6, train_loss_epoch=3.27e+6]Epoch 418/3000:  14%|█▍        | 418/3000 [03:02<18:26,  2.33it/s, v_num=1, train_loss_step=2.98e+6, train_loss_epoch=3.27e+6]Epoch 418/3000:  14%|█▍        | 418/3000 [03:02<18:26,  2.33it/s, v_num=1, train_loss_step=2.98e+6, train_loss_epoch=3.26e+6]Epoch 419/3000:  14%|█▍        | 418/3000 [03:02<18:26,  2.33it/s, v_num=1, train_loss_step=2.98e+6, train_loss_epoch=3.26e+6]Epoch 419/3000:  14%|█▍        | 419/3000 [03:02<18:46,  2.29it/s, v_num=1, train_loss_step=2.98e+6, train_loss_epoch=3.26e+6]Epoch 419/3000:  14%|█▍        | 419/3000 [03:02<18:46,  2.29it/s, v_num=1, train_loss_step=3.16e+6, train_loss_epoch=3.25e+6]Epoch 420/3000:  14%|█▍        | 419/3000 [03:02<18:46,  2.29it/s, v_num=1, train_loss_step=3.16e+6, train_loss_epoch=3.25e+6]Epoch 420/3000:  14%|█▍        | 420/3000 [03:03<18:59,  2.26it/s, v_num=1, train_loss_step=3.16e+6, train_loss_epoch=3.25e+6]Epoch 420/3000:  14%|█▍        | 420/3000 [03:03<18:59,  2.26it/s, v_num=1, train_loss_step=3.2e+6, train_loss_epoch=3.25e+6] Epoch 421/3000:  14%|█▍        | 420/3000 [03:03<18:59,  2.26it/s, v_num=1, train_loss_step=3.2e+6, train_loss_epoch=3.25e+6]Epoch 421/3000:  14%|█▍        | 421/3000 [03:03<19:12,  2.24it/s, v_num=1, train_loss_step=3.2e+6, train_loss_epoch=3.25e+6]Epoch 421/3000:  14%|█▍        | 421/3000 [03:03<19:12,  2.24it/s, v_num=1, train_loss_step=3.09e+6, train_loss_epoch=3.24e+6]Epoch 422/3000:  14%|█▍        | 421/3000 [03:03<19:12,  2.24it/s, v_num=1, train_loss_step=3.09e+6, train_loss_epoch=3.24e+6]Epoch 422/3000:  14%|█▍        | 422/3000 [03:04<18:34,  2.31it/s, v_num=1, train_loss_step=3.09e+6, train_loss_epoch=3.24e+6]Epoch 422/3000:  14%|█▍        | 422/3000 [03:04<18:34,  2.31it/s, v_num=1, train_loss_step=3.04e+6, train_loss_epoch=3.23e+6]Epoch 423/3000:  14%|█▍        | 422/3000 [03:04<18:34,  2.31it/s, v_num=1, train_loss_step=3.04e+6, train_loss_epoch=3.23e+6]Epoch 423/3000:  14%|█▍        | 423/3000 [03:04<18:23,  2.34it/s, v_num=1, train_loss_step=3.04e+6, train_loss_epoch=3.23e+6]Epoch 423/3000:  14%|█▍        | 423/3000 [03:04<18:23,  2.34it/s, v_num=1, train_loss_step=3.08e+6, train_loss_epoch=3.23e+6]Epoch 424/3000:  14%|█▍        | 423/3000 [03:04<18:23,  2.34it/s, v_num=1, train_loss_step=3.08e+6, train_loss_epoch=3.23e+6]Epoch 424/3000:  14%|█▍        | 424/3000 [03:05<18:41,  2.30it/s, v_num=1, train_loss_step=3.08e+6, train_loss_epoch=3.23e+6]Epoch 424/3000:  14%|█▍        | 424/3000 [03:05<18:41,  2.30it/s, v_num=1, train_loss_step=3.18e+6, train_loss_epoch=3.22e+6]Epoch 425/3000:  14%|█▍        | 424/3000 [03:05<18:41,  2.30it/s, v_num=1, train_loss_step=3.18e+6, train_loss_epoch=3.22e+6]Epoch 425/3000:  14%|█▍        | 425/3000 [03:05<17:48,  2.41it/s, v_num=1, train_loss_step=3.18e+6, train_loss_epoch=3.22e+6]Epoch 425/3000:  14%|█▍        | 425/3000 [03:05<17:48,  2.41it/s, v_num=1, train_loss_step=3.27e+6, train_loss_epoch=3.22e+6]Epoch 426/3000:  14%|█▍        | 425/3000 [03:05<17:48,  2.41it/s, v_num=1, train_loss_step=3.27e+6, train_loss_epoch=3.22e+6]Epoch 426/3000:  14%|█▍        | 426/3000 [03:05<18:34,  2.31it/s, v_num=1, train_loss_step=3.27e+6, train_loss_epoch=3.22e+6]Epoch 426/3000:  14%|█▍        | 426/3000 [03:05<18:34,  2.31it/s, v_num=1, train_loss_step=3.32e+6, train_loss_epoch=3.21e+6]Epoch 427/3000:  14%|█▍        | 426/3000 [03:05<18:34,  2.31it/s, v_num=1, train_loss_step=3.32e+6, train_loss_epoch=3.21e+6]Epoch 427/3000:  14%|█▍        | 427/3000 [03:06<18:49,  2.28it/s, v_num=1, train_loss_step=3.32e+6, train_loss_epoch=3.21e+6]Epoch 427/3000:  14%|█▍        | 427/3000 [03:06<18:49,  2.28it/s, v_num=1, train_loss_step=3.33e+6, train_loss_epoch=3.2e+6] Epoch 428/3000:  14%|█▍        | 427/3000 [03:06<18:49,  2.28it/s, v_num=1, train_loss_step=3.33e+6, train_loss_epoch=3.2e+6]Epoch 428/3000:  14%|█▍        | 428/3000 [03:06<18:49,  2.28it/s, v_num=1, train_loss_step=3.33e+6, train_loss_epoch=3.2e+6]Epoch 428/3000:  14%|█▍        | 428/3000 [03:06<18:49,  2.28it/s, v_num=1, train_loss_step=3.13e+6, train_loss_epoch=3.2e+6]Epoch 429/3000:  14%|█▍        | 428/3000 [03:06<18:49,  2.28it/s, v_num=1, train_loss_step=3.13e+6, train_loss_epoch=3.2e+6]Epoch 429/3000:  14%|█▍        | 429/3000 [03:07<18:30,  2.32it/s, v_num=1, train_loss_step=3.13e+6, train_loss_epoch=3.2e+6]Epoch 429/3000:  14%|█▍        | 429/3000 [03:07<18:30,  2.32it/s, v_num=1, train_loss_step=3.11e+6, train_loss_epoch=3.19e+6]Epoch 430/3000:  14%|█▍        | 429/3000 [03:07<18:30,  2.32it/s, v_num=1, train_loss_step=3.11e+6, train_loss_epoch=3.19e+6]Epoch 430/3000:  14%|█▍        | 430/3000 [03:07<18:27,  2.32it/s, v_num=1, train_loss_step=3.11e+6, train_loss_epoch=3.19e+6]Epoch 430/3000:  14%|█▍        | 430/3000 [03:07<18:27,  2.32it/s, v_num=1, train_loss_step=3.18e+6, train_loss_epoch=3.19e+6]Epoch 431/3000:  14%|█▍        | 430/3000 [03:07<18:27,  2.32it/s, v_num=1, train_loss_step=3.18e+6, train_loss_epoch=3.19e+6]Epoch 431/3000:  14%|█▍        | 431/3000 [03:08<18:22,  2.33it/s, v_num=1, train_loss_step=3.18e+6, train_loss_epoch=3.19e+6]Epoch 431/3000:  14%|█▍        | 431/3000 [03:08<18:22,  2.33it/s, v_num=1, train_loss_step=3.16e+6, train_loss_epoch=3.18e+6]Epoch 432/3000:  14%|█▍        | 431/3000 [03:08<18:22,  2.33it/s, v_num=1, train_loss_step=3.16e+6, train_loss_epoch=3.18e+6]Epoch 432/3000:  14%|█▍        | 432/3000 [03:08<17:57,  2.38it/s, v_num=1, train_loss_step=3.16e+6, train_loss_epoch=3.18e+6]Epoch 432/3000:  14%|█▍        | 432/3000 [03:08<17:57,  2.38it/s, v_num=1, train_loss_step=3.29e+6, train_loss_epoch=3.17e+6]Epoch 433/3000:  14%|█▍        | 432/3000 [03:08<17:57,  2.38it/s, v_num=1, train_loss_step=3.29e+6, train_loss_epoch=3.17e+6]Epoch 433/3000:  14%|█▍        | 433/3000 [03:08<17:42,  2.41it/s, v_num=1, train_loss_step=3.29e+6, train_loss_epoch=3.17e+6]Epoch 433/3000:  14%|█▍        | 433/3000 [03:08<17:42,  2.41it/s, v_num=1, train_loss_step=3.06e+6, train_loss_epoch=3.17e+6]Epoch 434/3000:  14%|█▍        | 433/3000 [03:08<17:42,  2.41it/s, v_num=1, train_loss_step=3.06e+6, train_loss_epoch=3.17e+6]Epoch 434/3000:  14%|█▍        | 434/3000 [03:09<16:31,  2.59it/s, v_num=1, train_loss_step=3.06e+6, train_loss_epoch=3.17e+6]Epoch 434/3000:  14%|█▍        | 434/3000 [03:09<16:31,  2.59it/s, v_num=1, train_loss_step=3.3e+6, train_loss_epoch=3.16e+6] Epoch 435/3000:  14%|█▍        | 434/3000 [03:09<16:31,  2.59it/s, v_num=1, train_loss_step=3.3e+6, train_loss_epoch=3.16e+6]Epoch 435/3000:  14%|█▍        | 435/3000 [03:09<16:37,  2.57it/s, v_num=1, train_loss_step=3.3e+6, train_loss_epoch=3.16e+6]Epoch 435/3000:  14%|█▍        | 435/3000 [03:09<16:37,  2.57it/s, v_num=1, train_loss_step=3.18e+6, train_loss_epoch=3.16e+6]Epoch 436/3000:  14%|█▍        | 435/3000 [03:09<16:37,  2.57it/s, v_num=1, train_loss_step=3.18e+6, train_loss_epoch=3.16e+6]Epoch 436/3000:  15%|█▍        | 436/3000 [03:09<17:13,  2.48it/s, v_num=1, train_loss_step=3.18e+6, train_loss_epoch=3.16e+6]Epoch 436/3000:  15%|█▍        | 436/3000 [03:09<17:13,  2.48it/s, v_num=1, train_loss_step=3.23e+6, train_loss_epoch=3.15e+6]Epoch 437/3000:  15%|█▍        | 436/3000 [03:09<17:13,  2.48it/s, v_num=1, train_loss_step=3.23e+6, train_loss_epoch=3.15e+6]Epoch 437/3000:  15%|█▍        | 437/3000 [03:10<17:51,  2.39it/s, v_num=1, train_loss_step=3.23e+6, train_loss_epoch=3.15e+6]Epoch 437/3000:  15%|█▍        | 437/3000 [03:10<17:51,  2.39it/s, v_num=1, train_loss_step=3.07e+6, train_loss_epoch=3.14e+6]Epoch 438/3000:  15%|█▍        | 437/3000 [03:10<17:51,  2.39it/s, v_num=1, train_loss_step=3.07e+6, train_loss_epoch=3.14e+6]Epoch 438/3000:  15%|█▍        | 438/3000 [03:10<18:17,  2.34it/s, v_num=1, train_loss_step=3.07e+6, train_loss_epoch=3.14e+6]Epoch 438/3000:  15%|█▍        | 438/3000 [03:10<18:17,  2.34it/s, v_num=1, train_loss_step=3.2e+6, train_loss_epoch=3.14e+6] Epoch 439/3000:  15%|█▍        | 438/3000 [03:10<18:17,  2.34it/s, v_num=1, train_loss_step=3.2e+6, train_loss_epoch=3.14e+6]Epoch 439/3000:  15%|█▍        | 439/3000 [03:11<18:30,  2.31it/s, v_num=1, train_loss_step=3.2e+6, train_loss_epoch=3.14e+6]Epoch 439/3000:  15%|█▍        | 439/3000 [03:11<18:30,  2.31it/s, v_num=1, train_loss_step=3.03e+6, train_loss_epoch=3.13e+6]Epoch 440/3000:  15%|█▍        | 439/3000 [03:11<18:30,  2.31it/s, v_num=1, train_loss_step=3.03e+6, train_loss_epoch=3.13e+6]Epoch 440/3000:  15%|█▍        | 440/3000 [03:11<19:19,  2.21it/s, v_num=1, train_loss_step=3.03e+6, train_loss_epoch=3.13e+6]Epoch 440/3000:  15%|█▍        | 440/3000 [03:11<19:19,  2.21it/s, v_num=1, train_loss_step=3.03e+6, train_loss_epoch=3.13e+6]Epoch 441/3000:  15%|█▍        | 440/3000 [03:11<19:19,  2.21it/s, v_num=1, train_loss_step=3.03e+6, train_loss_epoch=3.13e+6]Epoch 441/3000:  15%|█▍        | 441/3000 [03:12<19:41,  2.17it/s, v_num=1, train_loss_step=3.03e+6, train_loss_epoch=3.13e+6]Epoch 441/3000:  15%|█▍        | 441/3000 [03:12<19:41,  2.17it/s, v_num=1, train_loss_step=2.79e+6, train_loss_epoch=3.12e+6]Epoch 442/3000:  15%|█▍        | 441/3000 [03:12<19:41,  2.17it/s, v_num=1, train_loss_step=2.79e+6, train_loss_epoch=3.12e+6]Epoch 442/3000:  15%|█▍        | 442/3000 [03:12<18:53,  2.26it/s, v_num=1, train_loss_step=2.79e+6, train_loss_epoch=3.12e+6]Epoch 442/3000:  15%|█▍        | 442/3000 [03:12<18:53,  2.26it/s, v_num=1, train_loss_step=3.01e+6, train_loss_epoch=3.11e+6]Epoch 443/3000:  15%|█▍        | 442/3000 [03:12<18:53,  2.26it/s, v_num=1, train_loss_step=3.01e+6, train_loss_epoch=3.11e+6]Epoch 443/3000:  15%|█▍        | 443/3000 [03:13<18:33,  2.30it/s, v_num=1, train_loss_step=3.01e+6, train_loss_epoch=3.11e+6]Epoch 443/3000:  15%|█▍        | 443/3000 [03:13<18:33,  2.30it/s, v_num=1, train_loss_step=3.13e+6, train_loss_epoch=3.11e+6]Epoch 444/3000:  15%|█▍        | 443/3000 [03:13<18:33,  2.30it/s, v_num=1, train_loss_step=3.13e+6, train_loss_epoch=3.11e+6]Epoch 444/3000:  15%|█▍        | 444/3000 [03:13<18:20,  2.32it/s, v_num=1, train_loss_step=3.13e+6, train_loss_epoch=3.11e+6]Epoch 444/3000:  15%|█▍        | 444/3000 [03:13<18:20,  2.32it/s, v_num=1, train_loss_step=3.21e+6, train_loss_epoch=3.1e+6] Epoch 445/3000:  15%|█▍        | 444/3000 [03:13<18:20,  2.32it/s, v_num=1, train_loss_step=3.21e+6, train_loss_epoch=3.1e+6]Epoch 445/3000:  15%|█▍        | 445/3000 [03:14<19:06,  2.23it/s, v_num=1, train_loss_step=3.21e+6, train_loss_epoch=3.1e+6]Epoch 445/3000:  15%|█▍        | 445/3000 [03:14<19:06,  2.23it/s, v_num=1, train_loss_step=3.15e+6, train_loss_epoch=3.1e+6]Epoch 446/3000:  15%|█▍        | 445/3000 [03:14<19:06,  2.23it/s, v_num=1, train_loss_step=3.15e+6, train_loss_epoch=3.1e+6]Epoch 446/3000:  15%|█▍        | 446/3000 [03:14<18:39,  2.28it/s, v_num=1, train_loss_step=3.15e+6, train_loss_epoch=3.1e+6]Epoch 446/3000:  15%|█▍        | 446/3000 [03:14<18:39,  2.28it/s, v_num=1, train_loss_step=2.99e+6, train_loss_epoch=3.09e+6]Epoch 447/3000:  15%|█▍        | 446/3000 [03:14<18:39,  2.28it/s, v_num=1, train_loss_step=2.99e+6, train_loss_epoch=3.09e+6]Epoch 447/3000:  15%|█▍        | 447/3000 [03:14<18:10,  2.34it/s, v_num=1, train_loss_step=2.99e+6, train_loss_epoch=3.09e+6]Epoch 447/3000:  15%|█▍        | 447/3000 [03:14<18:10,  2.34it/s, v_num=1, train_loss_step=2.85e+6, train_loss_epoch=3.09e+6]Epoch 448/3000:  15%|█▍        | 447/3000 [03:14<18:10,  2.34it/s, v_num=1, train_loss_step=2.85e+6, train_loss_epoch=3.09e+6]Epoch 448/3000:  15%|█▍        | 448/3000 [03:15<17:55,  2.37it/s, v_num=1, train_loss_step=2.85e+6, train_loss_epoch=3.09e+6]Epoch 448/3000:  15%|█▍        | 448/3000 [03:15<17:55,  2.37it/s, v_num=1, train_loss_step=3.08e+6, train_loss_epoch=3.08e+6]Epoch 449/3000:  15%|█▍        | 448/3000 [03:15<17:55,  2.37it/s, v_num=1, train_loss_step=3.08e+6, train_loss_epoch=3.08e+6]Epoch 449/3000:  15%|█▍        | 449/3000 [03:15<18:20,  2.32it/s, v_num=1, train_loss_step=3.08e+6, train_loss_epoch=3.08e+6]Epoch 449/3000:  15%|█▍        | 449/3000 [03:15<18:20,  2.32it/s, v_num=1, train_loss_step=3.19e+6, train_loss_epoch=3.07e+6]Epoch 450/3000:  15%|█▍        | 449/3000 [03:15<18:20,  2.32it/s, v_num=1, train_loss_step=3.19e+6, train_loss_epoch=3.07e+6]Epoch 450/3000:  15%|█▌        | 450/3000 [03:16<18:03,  2.35it/s, v_num=1, train_loss_step=3.19e+6, train_loss_epoch=3.07e+6]Epoch 450/3000:  15%|█▌        | 450/3000 [03:16<18:03,  2.35it/s, v_num=1, train_loss_step=3.04e+6, train_loss_epoch=3.07e+6]Epoch 451/3000:  15%|█▌        | 450/3000 [03:16<18:03,  2.35it/s, v_num=1, train_loss_step=3.04e+6, train_loss_epoch=3.07e+6]Epoch 451/3000:  15%|█▌        | 451/3000 [03:16<17:22,  2.44it/s, v_num=1, train_loss_step=3.04e+6, train_loss_epoch=3.07e+6]Epoch 451/3000:  15%|█▌        | 451/3000 [03:16<17:22,  2.44it/s, v_num=1, train_loss_step=3.26e+6, train_loss_epoch=3.06e+6]Epoch 452/3000:  15%|█▌        | 451/3000 [03:16<17:22,  2.44it/s, v_num=1, train_loss_step=3.26e+6, train_loss_epoch=3.06e+6]Epoch 452/3000:  15%|█▌        | 452/3000 [03:16<17:40,  2.40it/s, v_num=1, train_loss_step=3.26e+6, train_loss_epoch=3.06e+6]Epoch 452/3000:  15%|█▌        | 452/3000 [03:16<17:40,  2.40it/s, v_num=1, train_loss_step=2.94e+6, train_loss_epoch=3.06e+6]Epoch 453/3000:  15%|█▌        | 452/3000 [03:16<17:40,  2.40it/s, v_num=1, train_loss_step=2.94e+6, train_loss_epoch=3.06e+6]Epoch 453/3000:  15%|█▌        | 453/3000 [03:17<18:32,  2.29it/s, v_num=1, train_loss_step=2.94e+6, train_loss_epoch=3.06e+6]Epoch 453/3000:  15%|█▌        | 453/3000 [03:17<18:32,  2.29it/s, v_num=1, train_loss_step=3.18e+6, train_loss_epoch=3.05e+6]Epoch 454/3000:  15%|█▌        | 453/3000 [03:17<18:32,  2.29it/s, v_num=1, train_loss_step=3.18e+6, train_loss_epoch=3.05e+6]Epoch 454/3000:  15%|█▌        | 454/3000 [03:17<18:42,  2.27it/s, v_num=1, train_loss_step=3.18e+6, train_loss_epoch=3.05e+6]Epoch 454/3000:  15%|█▌        | 454/3000 [03:17<18:42,  2.27it/s, v_num=1, train_loss_step=2.83e+6, train_loss_epoch=3.05e+6]Epoch 455/3000:  15%|█▌        | 454/3000 [03:17<18:42,  2.27it/s, v_num=1, train_loss_step=2.83e+6, train_loss_epoch=3.05e+6]Epoch 455/3000:  15%|█▌        | 455/3000 [03:18<19:26,  2.18it/s, v_num=1, train_loss_step=2.83e+6, train_loss_epoch=3.05e+6]Epoch 455/3000:  15%|█▌        | 455/3000 [03:18<19:26,  2.18it/s, v_num=1, train_loss_step=3.03e+6, train_loss_epoch=3.04e+6]Epoch 456/3000:  15%|█▌        | 455/3000 [03:18<19:26,  2.18it/s, v_num=1, train_loss_step=3.03e+6, train_loss_epoch=3.04e+6]Epoch 456/3000:  15%|█▌        | 456/3000 [03:18<19:11,  2.21it/s, v_num=1, train_loss_step=3.03e+6, train_loss_epoch=3.04e+6]Epoch 456/3000:  15%|█▌        | 456/3000 [03:18<19:11,  2.21it/s, v_num=1, train_loss_step=2.98e+6, train_loss_epoch=3.04e+6]Epoch 457/3000:  15%|█▌        | 456/3000 [03:18<19:11,  2.21it/s, v_num=1, train_loss_step=2.98e+6, train_loss_epoch=3.04e+6]Epoch 457/3000:  15%|█▌        | 457/3000 [03:19<18:50,  2.25it/s, v_num=1, train_loss_step=2.98e+6, train_loss_epoch=3.04e+6]Epoch 457/3000:  15%|█▌        | 457/3000 [03:19<18:50,  2.25it/s, v_num=1, train_loss_step=2.98e+6, train_loss_epoch=3.03e+6]Epoch 458/3000:  15%|█▌        | 457/3000 [03:19<18:50,  2.25it/s, v_num=1, train_loss_step=2.98e+6, train_loss_epoch=3.03e+6]Epoch 458/3000:  15%|█▌        | 458/3000 [03:19<18:02,  2.35it/s, v_num=1, train_loss_step=2.98e+6, train_loss_epoch=3.03e+6]Epoch 458/3000:  15%|█▌        | 458/3000 [03:19<18:02,  2.35it/s, v_num=1, train_loss_step=3.04e+6, train_loss_epoch=3.03e+6]Epoch 459/3000:  15%|█▌        | 458/3000 [03:19<18:02,  2.35it/s, v_num=1, train_loss_step=3.04e+6, train_loss_epoch=3.03e+6]Epoch 459/3000:  15%|█▌        | 459/3000 [03:20<18:05,  2.34it/s, v_num=1, train_loss_step=3.04e+6, train_loss_epoch=3.03e+6]Epoch 459/3000:  15%|█▌        | 459/3000 [03:20<18:05,  2.34it/s, v_num=1, train_loss_step=3.13e+6, train_loss_epoch=3.02e+6]Epoch 460/3000:  15%|█▌        | 459/3000 [03:20<18:05,  2.34it/s, v_num=1, train_loss_step=3.13e+6, train_loss_epoch=3.02e+6]Epoch 460/3000:  15%|█▌        | 460/3000 [03:20<17:28,  2.42it/s, v_num=1, train_loss_step=3.13e+6, train_loss_epoch=3.02e+6]Epoch 460/3000:  15%|█▌        | 460/3000 [03:20<17:28,  2.42it/s, v_num=1, train_loss_step=2.88e+6, train_loss_epoch=3.02e+6]Epoch 461/3000:  15%|█▌        | 460/3000 [03:20<17:28,  2.42it/s, v_num=1, train_loss_step=2.88e+6, train_loss_epoch=3.02e+6]Epoch 461/3000:  15%|█▌        | 461/3000 [03:20<17:41,  2.39it/s, v_num=1, train_loss_step=2.88e+6, train_loss_epoch=3.02e+6]Epoch 461/3000:  15%|█▌        | 461/3000 [03:20<17:41,  2.39it/s, v_num=1, train_loss_step=3.09e+6, train_loss_epoch=3.01e+6]Epoch 462/3000:  15%|█▌        | 461/3000 [03:20<17:41,  2.39it/s, v_num=1, train_loss_step=3.09e+6, train_loss_epoch=3.01e+6]Epoch 462/3000:  15%|█▌        | 462/3000 [03:21<17:03,  2.48it/s, v_num=1, train_loss_step=3.09e+6, train_loss_epoch=3.01e+6]Epoch 462/3000:  15%|█▌        | 462/3000 [03:21<17:03,  2.48it/s, v_num=1, train_loss_step=3.08e+6, train_loss_epoch=3e+6]   Epoch 463/3000:  15%|█▌        | 462/3000 [03:21<17:03,  2.48it/s, v_num=1, train_loss_step=3.08e+6, train_loss_epoch=3e+6]Epoch 463/3000:  15%|█▌        | 463/3000 [03:21<17:08,  2.47it/s, v_num=1, train_loss_step=3.08e+6, train_loss_epoch=3e+6]Epoch 463/3000:  15%|█▌        | 463/3000 [03:21<17:08,  2.47it/s, v_num=1, train_loss_step=2.92e+6, train_loss_epoch=3e+6]Epoch 464/3000:  15%|█▌        | 463/3000 [03:21<17:08,  2.47it/s, v_num=1, train_loss_step=2.92e+6, train_loss_epoch=3e+6]Epoch 464/3000:  15%|█▌        | 464/3000 [03:22<17:29,  2.42it/s, v_num=1, train_loss_step=2.92e+6, train_loss_epoch=3e+6]Epoch 464/3000:  15%|█▌        | 464/3000 [03:22<17:29,  2.42it/s, v_num=1, train_loss_step=3.2e+6, train_loss_epoch=2.99e+6]Epoch 465/3000:  15%|█▌        | 464/3000 [03:22<17:29,  2.42it/s, v_num=1, train_loss_step=3.2e+6, train_loss_epoch=2.99e+6]Epoch 465/3000:  16%|█▌        | 465/3000 [03:22<18:35,  2.27it/s, v_num=1, train_loss_step=3.2e+6, train_loss_epoch=2.99e+6]Epoch 465/3000:  16%|█▌        | 465/3000 [03:22<18:35,  2.27it/s, v_num=1, train_loss_step=2.88e+6, train_loss_epoch=2.99e+6]Epoch 466/3000:  16%|█▌        | 465/3000 [03:22<18:35,  2.27it/s, v_num=1, train_loss_step=2.88e+6, train_loss_epoch=2.99e+6]Epoch 466/3000:  16%|█▌        | 466/3000 [03:23<18:48,  2.25it/s, v_num=1, train_loss_step=2.88e+6, train_loss_epoch=2.99e+6]Epoch 466/3000:  16%|█▌        | 466/3000 [03:23<18:48,  2.25it/s, v_num=1, train_loss_step=3.11e+6, train_loss_epoch=2.98e+6]Epoch 467/3000:  16%|█▌        | 466/3000 [03:23<18:48,  2.25it/s, v_num=1, train_loss_step=3.11e+6, train_loss_epoch=2.98e+6]Epoch 467/3000:  16%|█▌        | 467/3000 [03:23<17:03,  2.47it/s, v_num=1, train_loss_step=3.11e+6, train_loss_epoch=2.98e+6]Epoch 467/3000:  16%|█▌        | 467/3000 [03:23<17:03,  2.47it/s, v_num=1, train_loss_step=3.08e+6, train_loss_epoch=2.98e+6]Epoch 468/3000:  16%|█▌        | 467/3000 [03:23<17:03,  2.47it/s, v_num=1, train_loss_step=3.08e+6, train_loss_epoch=2.98e+6]Epoch 468/3000:  16%|█▌        | 468/3000 [03:23<17:00,  2.48it/s, v_num=1, train_loss_step=3.08e+6, train_loss_epoch=2.98e+6]Epoch 468/3000:  16%|█▌        | 468/3000 [03:23<17:00,  2.48it/s, v_num=1, train_loss_step=2.76e+6, train_loss_epoch=2.97e+6]Epoch 469/3000:  16%|█▌        | 468/3000 [03:23<17:00,  2.48it/s, v_num=1, train_loss_step=2.76e+6, train_loss_epoch=2.97e+6]Epoch 469/3000:  16%|█▌        | 469/3000 [03:24<17:13,  2.45it/s, v_num=1, train_loss_step=2.76e+6, train_loss_epoch=2.97e+6]Epoch 469/3000:  16%|█▌        | 469/3000 [03:24<17:13,  2.45it/s, v_num=1, train_loss_step=3.05e+6, train_loss_epoch=2.97e+6]Epoch 470/3000:  16%|█▌        | 469/3000 [03:24<17:13,  2.45it/s, v_num=1, train_loss_step=3.05e+6, train_loss_epoch=2.97e+6]Epoch 470/3000:  16%|█▌        | 470/3000 [03:24<18:01,  2.34it/s, v_num=1, train_loss_step=3.05e+6, train_loss_epoch=2.97e+6]Epoch 470/3000:  16%|█▌        | 470/3000 [03:24<18:01,  2.34it/s, v_num=1, train_loss_step=2.98e+6, train_loss_epoch=2.96e+6]Epoch 471/3000:  16%|█▌        | 470/3000 [03:24<18:01,  2.34it/s, v_num=1, train_loss_step=2.98e+6, train_loss_epoch=2.96e+6]Epoch 471/3000:  16%|█▌        | 471/3000 [03:25<18:45,  2.25it/s, v_num=1, train_loss_step=2.98e+6, train_loss_epoch=2.96e+6]Epoch 471/3000:  16%|█▌        | 471/3000 [03:25<18:45,  2.25it/s, v_num=1, train_loss_step=3.07e+6, train_loss_epoch=2.96e+6]Epoch 472/3000:  16%|█▌        | 471/3000 [03:25<18:45,  2.25it/s, v_num=1, train_loss_step=3.07e+6, train_loss_epoch=2.96e+6]Epoch 472/3000:  16%|█▌        | 472/3000 [03:25<18:06,  2.33it/s, v_num=1, train_loss_step=3.07e+6, train_loss_epoch=2.96e+6]Epoch 472/3000:  16%|█▌        | 472/3000 [03:25<18:06,  2.33it/s, v_num=1, train_loss_step=2.84e+6, train_loss_epoch=2.95e+6]Epoch 473/3000:  16%|█▌        | 472/3000 [03:25<18:06,  2.33it/s, v_num=1, train_loss_step=2.84e+6, train_loss_epoch=2.95e+6]Epoch 473/3000:  16%|█▌        | 473/3000 [03:26<18:53,  2.23it/s, v_num=1, train_loss_step=2.84e+6, train_loss_epoch=2.95e+6]Epoch 473/3000:  16%|█▌        | 473/3000 [03:26<18:53,  2.23it/s, v_num=1, train_loss_step=3.11e+6, train_loss_epoch=2.95e+6]Epoch 474/3000:  16%|█▌        | 473/3000 [03:26<18:53,  2.23it/s, v_num=1, train_loss_step=3.11e+6, train_loss_epoch=2.95e+6]Epoch 474/3000:  16%|█▌        | 474/3000 [03:26<18:09,  2.32it/s, v_num=1, train_loss_step=3.11e+6, train_loss_epoch=2.95e+6]Epoch 474/3000:  16%|█▌        | 474/3000 [03:26<18:09,  2.32it/s, v_num=1, train_loss_step=2.81e+6, train_loss_epoch=2.94e+6]Epoch 475/3000:  16%|█▌        | 474/3000 [03:26<18:09,  2.32it/s, v_num=1, train_loss_step=2.81e+6, train_loss_epoch=2.94e+6]Epoch 475/3000:  16%|█▌        | 475/3000 [03:26<18:29,  2.28it/s, v_num=1, train_loss_step=2.81e+6, train_loss_epoch=2.94e+6]Epoch 475/3000:  16%|█▌        | 475/3000 [03:26<18:29,  2.28it/s, v_num=1, train_loss_step=2.97e+6, train_loss_epoch=2.94e+6]Epoch 476/3000:  16%|█▌        | 475/3000 [03:26<18:29,  2.28it/s, v_num=1, train_loss_step=2.97e+6, train_loss_epoch=2.94e+6]Epoch 476/3000:  16%|█▌        | 476/3000 [03:27<18:29,  2.28it/s, v_num=1, train_loss_step=2.97e+6, train_loss_epoch=2.94e+6]Epoch 476/3000:  16%|█▌        | 476/3000 [03:27<18:29,  2.28it/s, v_num=1, train_loss_step=2.96e+6, train_loss_epoch=2.93e+6]Epoch 477/3000:  16%|█▌        | 476/3000 [03:27<18:29,  2.28it/s, v_num=1, train_loss_step=2.96e+6, train_loss_epoch=2.93e+6]Epoch 477/3000:  16%|█▌        | 477/3000 [03:27<18:24,  2.28it/s, v_num=1, train_loss_step=2.96e+6, train_loss_epoch=2.93e+6]Epoch 477/3000:  16%|█▌        | 477/3000 [03:27<18:24,  2.28it/s, v_num=1, train_loss_step=2.71e+6, train_loss_epoch=2.93e+6]Epoch 478/3000:  16%|█▌        | 477/3000 [03:27<18:24,  2.28it/s, v_num=1, train_loss_step=2.71e+6, train_loss_epoch=2.93e+6]Epoch 478/3000:  16%|█▌        | 478/3000 [03:28<18:53,  2.22it/s, v_num=1, train_loss_step=2.71e+6, train_loss_epoch=2.93e+6]Epoch 478/3000:  16%|█▌        | 478/3000 [03:28<18:53,  2.22it/s, v_num=1, train_loss_step=2.92e+6, train_loss_epoch=2.92e+6]Epoch 479/3000:  16%|█▌        | 478/3000 [03:28<18:53,  2.22it/s, v_num=1, train_loss_step=2.92e+6, train_loss_epoch=2.92e+6]Epoch 479/3000:  16%|█▌        | 479/3000 [03:28<18:07,  2.32it/s, v_num=1, train_loss_step=2.92e+6, train_loss_epoch=2.92e+6]Epoch 479/3000:  16%|█▌        | 479/3000 [03:28<18:07,  2.32it/s, v_num=1, train_loss_step=2.83e+6, train_loss_epoch=2.92e+6]Epoch 480/3000:  16%|█▌        | 479/3000 [03:28<18:07,  2.32it/s, v_num=1, train_loss_step=2.83e+6, train_loss_epoch=2.92e+6]Epoch 480/3000:  16%|█▌        | 480/3000 [03:28<16:24,  2.56it/s, v_num=1, train_loss_step=2.83e+6, train_loss_epoch=2.92e+6]Epoch 480/3000:  16%|█▌        | 480/3000 [03:28<16:24,  2.56it/s, v_num=1, train_loss_step=3.1e+6, train_loss_epoch=2.91e+6] Epoch 481/3000:  16%|█▌        | 480/3000 [03:28<16:24,  2.56it/s, v_num=1, train_loss_step=3.1e+6, train_loss_epoch=2.91e+6]Epoch 481/3000:  16%|█▌        | 481/3000 [03:29<17:01,  2.47it/s, v_num=1, train_loss_step=3.1e+6, train_loss_epoch=2.91e+6]Epoch 481/3000:  16%|█▌        | 481/3000 [03:29<17:01,  2.47it/s, v_num=1, train_loss_step=2.55e+6, train_loss_epoch=2.91e+6]Epoch 482/3000:  16%|█▌        | 481/3000 [03:29<17:01,  2.47it/s, v_num=1, train_loss_step=2.55e+6, train_loss_epoch=2.91e+6]Epoch 482/3000:  16%|█▌        | 482/3000 [03:29<17:21,  2.42it/s, v_num=1, train_loss_step=2.55e+6, train_loss_epoch=2.91e+6]Epoch 482/3000:  16%|█▌        | 482/3000 [03:29<17:21,  2.42it/s, v_num=1, train_loss_step=2.95e+6, train_loss_epoch=2.9e+6] Epoch 483/3000:  16%|█▌        | 482/3000 [03:29<17:21,  2.42it/s, v_num=1, train_loss_step=2.95e+6, train_loss_epoch=2.9e+6]Epoch 483/3000:  16%|█▌        | 483/3000 [03:30<17:04,  2.46it/s, v_num=1, train_loss_step=2.95e+6, train_loss_epoch=2.9e+6]Epoch 483/3000:  16%|█▌        | 483/3000 [03:30<17:04,  2.46it/s, v_num=1, train_loss_step=2.84e+6, train_loss_epoch=2.9e+6]Epoch 484/3000:  16%|█▌        | 483/3000 [03:30<17:04,  2.46it/s, v_num=1, train_loss_step=2.84e+6, train_loss_epoch=2.9e+6]Epoch 484/3000:  16%|█▌        | 484/3000 [03:30<16:16,  2.58it/s, v_num=1, train_loss_step=2.84e+6, train_loss_epoch=2.9e+6]Epoch 484/3000:  16%|█▌        | 484/3000 [03:30<16:16,  2.58it/s, v_num=1, train_loss_step=3.31e+6, train_loss_epoch=2.9e+6]Epoch 485/3000:  16%|█▌        | 484/3000 [03:30<16:16,  2.58it/s, v_num=1, train_loss_step=3.31e+6, train_loss_epoch=2.9e+6]Epoch 485/3000:  16%|█▌        | 485/3000 [03:30<16:42,  2.51it/s, v_num=1, train_loss_step=3.31e+6, train_loss_epoch=2.9e+6]Epoch 485/3000:  16%|█▌        | 485/3000 [03:30<16:42,  2.51it/s, v_num=1, train_loss_step=2.76e+6, train_loss_epoch=2.89e+6]Epoch 486/3000:  16%|█▌        | 485/3000 [03:30<16:42,  2.51it/s, v_num=1, train_loss_step=2.76e+6, train_loss_epoch=2.89e+6]Epoch 486/3000:  16%|█▌        | 486/3000 [03:31<17:16,  2.43it/s, v_num=1, train_loss_step=2.76e+6, train_loss_epoch=2.89e+6]Epoch 486/3000:  16%|█▌        | 486/3000 [03:31<17:16,  2.43it/s, v_num=1, train_loss_step=2.67e+6, train_loss_epoch=2.89e+6]Epoch 487/3000:  16%|█▌        | 486/3000 [03:31<17:16,  2.43it/s, v_num=1, train_loss_step=2.67e+6, train_loss_epoch=2.89e+6]Epoch 487/3000:  16%|█▌        | 487/3000 [03:31<19:21,  2.16it/s, v_num=1, train_loss_step=2.67e+6, train_loss_epoch=2.89e+6]Epoch 487/3000:  16%|█▌        | 487/3000 [03:31<19:21,  2.16it/s, v_num=1, train_loss_step=2.88e+6, train_loss_epoch=2.88e+6]Epoch 488/3000:  16%|█▌        | 487/3000 [03:31<19:21,  2.16it/s, v_num=1, train_loss_step=2.88e+6, train_loss_epoch=2.88e+6]Epoch 488/3000:  16%|█▋        | 488/3000 [03:32<19:12,  2.18it/s, v_num=1, train_loss_step=2.88e+6, train_loss_epoch=2.88e+6]Epoch 488/3000:  16%|█▋        | 488/3000 [03:32<19:12,  2.18it/s, v_num=1, train_loss_step=2.84e+6, train_loss_epoch=2.88e+6]Epoch 489/3000:  16%|█▋        | 488/3000 [03:32<19:12,  2.18it/s, v_num=1, train_loss_step=2.84e+6, train_loss_epoch=2.88e+6]Epoch 489/3000:  16%|█▋        | 489/3000 [03:32<19:35,  2.14it/s, v_num=1, train_loss_step=2.84e+6, train_loss_epoch=2.88e+6]Epoch 489/3000:  16%|█▋        | 489/3000 [03:32<19:35,  2.14it/s, v_num=1, train_loss_step=3.02e+6, train_loss_epoch=2.87e+6]Epoch 490/3000:  16%|█▋        | 489/3000 [03:32<19:35,  2.14it/s, v_num=1, train_loss_step=3.02e+6, train_loss_epoch=2.87e+6]Epoch 490/3000:  16%|█▋        | 490/3000 [03:33<19:58,  2.09it/s, v_num=1, train_loss_step=3.02e+6, train_loss_epoch=2.87e+6]Epoch 490/3000:  16%|█▋        | 490/3000 [03:33<19:58,  2.09it/s, v_num=1, train_loss_step=3.15e+6, train_loss_epoch=2.87e+6]Epoch 491/3000:  16%|█▋        | 490/3000 [03:33<19:58,  2.09it/s, v_num=1, train_loss_step=3.15e+6, train_loss_epoch=2.87e+6]Epoch 491/3000:  16%|█▋        | 491/3000 [03:33<19:55,  2.10it/s, v_num=1, train_loss_step=3.15e+6, train_loss_epoch=2.87e+6]Epoch 491/3000:  16%|█▋        | 491/3000 [03:33<19:55,  2.10it/s, v_num=1, train_loss_step=2.88e+6, train_loss_epoch=2.86e+6]Epoch 492/3000:  16%|█▋        | 491/3000 [03:33<19:55,  2.10it/s, v_num=1, train_loss_step=2.88e+6, train_loss_epoch=2.86e+6]Epoch 492/3000:  16%|█▋        | 492/3000 [03:34<18:22,  2.27it/s, v_num=1, train_loss_step=2.88e+6, train_loss_epoch=2.86e+6]Epoch 492/3000:  16%|█▋        | 492/3000 [03:34<18:22,  2.27it/s, v_num=1, train_loss_step=2.81e+6, train_loss_epoch=2.86e+6]Epoch 493/3000:  16%|█▋        | 492/3000 [03:34<18:22,  2.27it/s, v_num=1, train_loss_step=2.81e+6, train_loss_epoch=2.86e+6]Epoch 493/3000:  16%|█▋        | 493/3000 [03:34<17:40,  2.36it/s, v_num=1, train_loss_step=2.81e+6, train_loss_epoch=2.86e+6]Epoch 493/3000:  16%|█▋        | 493/3000 [03:34<17:40,  2.36it/s, v_num=1, train_loss_step=2.73e+6, train_loss_epoch=2.85e+6]Epoch 494/3000:  16%|█▋        | 493/3000 [03:34<17:40,  2.36it/s, v_num=1, train_loss_step=2.73e+6, train_loss_epoch=2.85e+6]Epoch 494/3000:  16%|█▋        | 494/3000 [03:35<18:18,  2.28it/s, v_num=1, train_loss_step=2.73e+6, train_loss_epoch=2.85e+6]Epoch 494/3000:  16%|█▋        | 494/3000 [03:35<18:18,  2.28it/s, v_num=1, train_loss_step=2.92e+6, train_loss_epoch=2.85e+6]Epoch 495/3000:  16%|█▋        | 494/3000 [03:35<18:18,  2.28it/s, v_num=1, train_loss_step=2.92e+6, train_loss_epoch=2.85e+6]Epoch 495/3000:  16%|█▋        | 495/3000 [03:35<18:05,  2.31it/s, v_num=1, train_loss_step=2.92e+6, train_loss_epoch=2.85e+6]Epoch 495/3000:  16%|█▋        | 495/3000 [03:35<18:05,  2.31it/s, v_num=1, train_loss_step=2.77e+6, train_loss_epoch=2.84e+6]Epoch 496/3000:  16%|█▋        | 495/3000 [03:35<18:05,  2.31it/s, v_num=1, train_loss_step=2.77e+6, train_loss_epoch=2.84e+6]Epoch 496/3000:  17%|█▋        | 496/3000 [03:35<17:56,  2.33it/s, v_num=1, train_loss_step=2.77e+6, train_loss_epoch=2.84e+6]Epoch 496/3000:  17%|█▋        | 496/3000 [03:35<17:56,  2.33it/s, v_num=1, train_loss_step=2.92e+6, train_loss_epoch=2.84e+6]Epoch 497/3000:  17%|█▋        | 496/3000 [03:35<17:56,  2.33it/s, v_num=1, train_loss_step=2.92e+6, train_loss_epoch=2.84e+6]Epoch 497/3000:  17%|█▋        | 497/3000 [03:36<18:51,  2.21it/s, v_num=1, train_loss_step=2.92e+6, train_loss_epoch=2.84e+6]Epoch 497/3000:  17%|█▋        | 497/3000 [03:36<18:51,  2.21it/s, v_num=1, train_loss_step=3.04e+6, train_loss_epoch=2.84e+6]Epoch 498/3000:  17%|█▋        | 497/3000 [03:36<18:51,  2.21it/s, v_num=1, train_loss_step=3.04e+6, train_loss_epoch=2.84e+6]Epoch 498/3000:  17%|█▋        | 498/3000 [03:36<18:35,  2.24it/s, v_num=1, train_loss_step=3.04e+6, train_loss_epoch=2.84e+6]Epoch 498/3000:  17%|█▋        | 498/3000 [03:36<18:35,  2.24it/s, v_num=1, train_loss_step=2.76e+6, train_loss_epoch=2.83e+6]Epoch 499/3000:  17%|█▋        | 498/3000 [03:36<18:35,  2.24it/s, v_num=1, train_loss_step=2.76e+6, train_loss_epoch=2.83e+6]Epoch 499/3000:  17%|█▋        | 499/3000 [03:37<18:10,  2.29it/s, v_num=1, train_loss_step=2.76e+6, train_loss_epoch=2.83e+6]Epoch 499/3000:  17%|█▋        | 499/3000 [03:37<18:10,  2.29it/s, v_num=1, train_loss_step=3.06e+6, train_loss_epoch=2.83e+6]Epoch 500/3000:  17%|█▋        | 499/3000 [03:37<18:10,  2.29it/s, v_num=1, train_loss_step=3.06e+6, train_loss_epoch=2.83e+6]Epoch 500/3000:  17%|█▋        | 500/3000 [03:37<17:21,  2.40it/s, v_num=1, train_loss_step=3.06e+6, train_loss_epoch=2.83e+6]Epoch 500/3000:  17%|█▋        | 500/3000 [03:37<17:21,  2.40it/s, v_num=1, train_loss_step=2.81e+6, train_loss_epoch=2.82e+6]Epoch 501/3000:  17%|█▋        | 500/3000 [03:37<17:21,  2.40it/s, v_num=1, train_loss_step=2.81e+6, train_loss_epoch=2.82e+6]Epoch 501/3000:  17%|█▋        | 501/3000 [03:38<17:48,  2.34it/s, v_num=1, train_loss_step=2.81e+6, train_loss_epoch=2.82e+6]Epoch 501/3000:  17%|█▋        | 501/3000 [03:38<17:48,  2.34it/s, v_num=1, train_loss_step=2.9e+6, train_loss_epoch=2.82e+6] Epoch 502/3000:  17%|█▋        | 501/3000 [03:38<17:48,  2.34it/s, v_num=1, train_loss_step=2.9e+6, train_loss_epoch=2.82e+6]Epoch 502/3000:  17%|█▋        | 502/3000 [03:38<18:14,  2.28it/s, v_num=1, train_loss_step=2.9e+6, train_loss_epoch=2.82e+6]Epoch 502/3000:  17%|█▋        | 502/3000 [03:38<18:14,  2.28it/s, v_num=1, train_loss_step=2.75e+6, train_loss_epoch=2.81e+6]Epoch 503/3000:  17%|█▋        | 502/3000 [03:38<18:14,  2.28it/s, v_num=1, train_loss_step=2.75e+6, train_loss_epoch=2.81e+6]Epoch 503/3000:  17%|█▋        | 503/3000 [03:38<18:12,  2.29it/s, v_num=1, train_loss_step=2.75e+6, train_loss_epoch=2.81e+6]Epoch 503/3000:  17%|█▋        | 503/3000 [03:38<18:12,  2.29it/s, v_num=1, train_loss_step=2.8e+6, train_loss_epoch=2.81e+6] Epoch 504/3000:  17%|█▋        | 503/3000 [03:38<18:12,  2.29it/s, v_num=1, train_loss_step=2.8e+6, train_loss_epoch=2.81e+6]Epoch 504/3000:  17%|█▋        | 504/3000 [03:39<18:26,  2.26it/s, v_num=1, train_loss_step=2.8e+6, train_loss_epoch=2.81e+6]Epoch 504/3000:  17%|█▋        | 504/3000 [03:39<18:26,  2.26it/s, v_num=1, train_loss_step=2.94e+6, train_loss_epoch=2.8e+6]Epoch 505/3000:  17%|█▋        | 504/3000 [03:39<18:26,  2.26it/s, v_num=1, train_loss_step=2.94e+6, train_loss_epoch=2.8e+6]Epoch 505/3000:  17%|█▋        | 505/3000 [03:39<18:30,  2.25it/s, v_num=1, train_loss_step=2.94e+6, train_loss_epoch=2.8e+6]Epoch 505/3000:  17%|█▋        | 505/3000 [03:39<18:30,  2.25it/s, v_num=1, train_loss_step=2.69e+6, train_loss_epoch=2.8e+6]Epoch 506/3000:  17%|█▋        | 505/3000 [03:39<18:30,  2.25it/s, v_num=1, train_loss_step=2.69e+6, train_loss_epoch=2.8e+6]Epoch 506/3000:  17%|█▋        | 506/3000 [03:40<18:12,  2.28it/s, v_num=1, train_loss_step=2.69e+6, train_loss_epoch=2.8e+6]Epoch 506/3000:  17%|█▋        | 506/3000 [03:40<18:12,  2.28it/s, v_num=1, train_loss_step=2.83e+6, train_loss_epoch=2.8e+6]Epoch 507/3000:  17%|█▋        | 506/3000 [03:40<18:12,  2.28it/s, v_num=1, train_loss_step=2.83e+6, train_loss_epoch=2.8e+6]Epoch 507/3000:  17%|█▋        | 507/3000 [03:40<17:34,  2.37it/s, v_num=1, train_loss_step=2.83e+6, train_loss_epoch=2.8e+6]Epoch 507/3000:  17%|█▋        | 507/3000 [03:40<17:34,  2.37it/s, v_num=1, train_loss_step=2.81e+6, train_loss_epoch=2.79e+6]Epoch 508/3000:  17%|█▋        | 507/3000 [03:40<17:34,  2.37it/s, v_num=1, train_loss_step=2.81e+6, train_loss_epoch=2.79e+6]Epoch 508/3000:  17%|█▋        | 508/3000 [03:41<17:26,  2.38it/s, v_num=1, train_loss_step=2.81e+6, train_loss_epoch=2.79e+6]Epoch 508/3000:  17%|█▋        | 508/3000 [03:41<17:26,  2.38it/s, v_num=1, train_loss_step=2.99e+6, train_loss_epoch=2.79e+6]Epoch 509/3000:  17%|█▋        | 508/3000 [03:41<17:26,  2.38it/s, v_num=1, train_loss_step=2.99e+6, train_loss_epoch=2.79e+6]Epoch 509/3000:  17%|█▋        | 509/3000 [03:41<18:35,  2.23it/s, v_num=1, train_loss_step=2.99e+6, train_loss_epoch=2.79e+6]Epoch 509/3000:  17%|█▋        | 509/3000 [03:41<18:35,  2.23it/s, v_num=1, train_loss_step=2.59e+6, train_loss_epoch=2.78e+6]Epoch 510/3000:  17%|█▋        | 509/3000 [03:41<18:35,  2.23it/s, v_num=1, train_loss_step=2.59e+6, train_loss_epoch=2.78e+6]Epoch 510/3000:  17%|█▋        | 510/3000 [03:42<18:20,  2.26it/s, v_num=1, train_loss_step=2.59e+6, train_loss_epoch=2.78e+6]Epoch 510/3000:  17%|█▋        | 510/3000 [03:42<18:20,  2.26it/s, v_num=1, train_loss_step=2.69e+6, train_loss_epoch=2.78e+6]Epoch 511/3000:  17%|█▋        | 510/3000 [03:42<18:20,  2.26it/s, v_num=1, train_loss_step=2.69e+6, train_loss_epoch=2.78e+6]Epoch 511/3000:  17%|█▋        | 511/3000 [03:42<17:28,  2.37it/s, v_num=1, train_loss_step=2.69e+6, train_loss_epoch=2.78e+6]Epoch 511/3000:  17%|█▋        | 511/3000 [03:42<17:28,  2.37it/s, v_num=1, train_loss_step=2.76e+6, train_loss_epoch=2.77e+6]Epoch 512/3000:  17%|█▋        | 511/3000 [03:42<17:28,  2.37it/s, v_num=1, train_loss_step=2.76e+6, train_loss_epoch=2.77e+6]Epoch 512/3000:  17%|█▋        | 512/3000 [03:42<16:40,  2.49it/s, v_num=1, train_loss_step=2.76e+6, train_loss_epoch=2.77e+6]Epoch 512/3000:  17%|█▋        | 512/3000 [03:42<16:40,  2.49it/s, v_num=1, train_loss_step=2.84e+6, train_loss_epoch=2.77e+6]Epoch 513/3000:  17%|█▋        | 512/3000 [03:42<16:40,  2.49it/s, v_num=1, train_loss_step=2.84e+6, train_loss_epoch=2.77e+6]Epoch 513/3000:  17%|█▋        | 513/3000 [03:43<17:03,  2.43it/s, v_num=1, train_loss_step=2.84e+6, train_loss_epoch=2.77e+6]Epoch 513/3000:  17%|█▋        | 513/3000 [03:43<17:03,  2.43it/s, v_num=1, train_loss_step=2.81e+6, train_loss_epoch=2.77e+6]Epoch 514/3000:  17%|█▋        | 513/3000 [03:43<17:03,  2.43it/s, v_num=1, train_loss_step=2.81e+6, train_loss_epoch=2.77e+6]Epoch 514/3000:  17%|█▋        | 514/3000 [03:43<17:36,  2.35it/s, v_num=1, train_loss_step=2.81e+6, train_loss_epoch=2.77e+6]Epoch 514/3000:  17%|█▋        | 514/3000 [03:43<17:36,  2.35it/s, v_num=1, train_loss_step=2.97e+6, train_loss_epoch=2.76e+6]Epoch 515/3000:  17%|█▋        | 514/3000 [03:43<17:36,  2.35it/s, v_num=1, train_loss_step=2.97e+6, train_loss_epoch=2.76e+6]Epoch 515/3000:  17%|█▋        | 515/3000 [03:44<17:51,  2.32it/s, v_num=1, train_loss_step=2.97e+6, train_loss_epoch=2.76e+6]Epoch 515/3000:  17%|█▋        | 515/3000 [03:44<17:51,  2.32it/s, v_num=1, train_loss_step=2.62e+6, train_loss_epoch=2.76e+6]Epoch 516/3000:  17%|█▋        | 515/3000 [03:44<17:51,  2.32it/s, v_num=1, train_loss_step=2.62e+6, train_loss_epoch=2.76e+6]Epoch 516/3000:  17%|█▋        | 516/3000 [03:44<18:38,  2.22it/s, v_num=1, train_loss_step=2.62e+6, train_loss_epoch=2.76e+6]Epoch 516/3000:  17%|█▋        | 516/3000 [03:44<18:38,  2.22it/s, v_num=1, train_loss_step=2.61e+6, train_loss_epoch=2.75e+6]Epoch 517/3000:  17%|█▋        | 516/3000 [03:44<18:38,  2.22it/s, v_num=1, train_loss_step=2.61e+6, train_loss_epoch=2.75e+6]Epoch 517/3000:  17%|█▋        | 517/3000 [03:45<19:22,  2.14it/s, v_num=1, train_loss_step=2.61e+6, train_loss_epoch=2.75e+6]Epoch 517/3000:  17%|█▋        | 517/3000 [03:45<19:22,  2.14it/s, v_num=1, train_loss_step=2.78e+6, train_loss_epoch=2.75e+6]Epoch 518/3000:  17%|█▋        | 517/3000 [03:45<19:22,  2.14it/s, v_num=1, train_loss_step=2.78e+6, train_loss_epoch=2.75e+6]Epoch 518/3000:  17%|█▋        | 518/3000 [03:45<19:41,  2.10it/s, v_num=1, train_loss_step=2.78e+6, train_loss_epoch=2.75e+6]Epoch 518/3000:  17%|█▋        | 518/3000 [03:45<19:41,  2.10it/s, v_num=1, train_loss_step=2.63e+6, train_loss_epoch=2.75e+6]Epoch 519/3000:  17%|█▋        | 518/3000 [03:45<19:41,  2.10it/s, v_num=1, train_loss_step=2.63e+6, train_loss_epoch=2.75e+6]Epoch 519/3000:  17%|█▋        | 519/3000 [03:46<21:22,  1.93it/s, v_num=1, train_loss_step=2.63e+6, train_loss_epoch=2.75e+6]Epoch 519/3000:  17%|█▋        | 519/3000 [03:46<21:22,  1.93it/s, v_num=1, train_loss_step=3.01e+6, train_loss_epoch=2.74e+6]Epoch 520/3000:  17%|█▋        | 519/3000 [03:46<21:22,  1.93it/s, v_num=1, train_loss_step=3.01e+6, train_loss_epoch=2.74e+6]Epoch 520/3000:  17%|█▋        | 520/3000 [03:46<19:50,  2.08it/s, v_num=1, train_loss_step=3.01e+6, train_loss_epoch=2.74e+6]Epoch 520/3000:  17%|█▋        | 520/3000 [03:46<19:50,  2.08it/s, v_num=1, train_loss_step=2.75e+6, train_loss_epoch=2.74e+6]Epoch 521/3000:  17%|█▋        | 520/3000 [03:46<19:50,  2.08it/s, v_num=1, train_loss_step=2.75e+6, train_loss_epoch=2.74e+6]Epoch 521/3000:  17%|█▋        | 521/3000 [03:47<19:01,  2.17it/s, v_num=1, train_loss_step=2.75e+6, train_loss_epoch=2.74e+6]Epoch 521/3000:  17%|█▋        | 521/3000 [03:47<19:01,  2.17it/s, v_num=1, train_loss_step=2.75e+6, train_loss_epoch=2.73e+6]Epoch 522/3000:  17%|█▋        | 521/3000 [03:47<19:01,  2.17it/s, v_num=1, train_loss_step=2.75e+6, train_loss_epoch=2.73e+6]Epoch 522/3000:  17%|█▋        | 522/3000 [03:47<17:18,  2.39it/s, v_num=1, train_loss_step=2.75e+6, train_loss_epoch=2.73e+6]Epoch 522/3000:  17%|█▋        | 522/3000 [03:47<17:18,  2.39it/s, v_num=1, train_loss_step=2.73e+6, train_loss_epoch=2.73e+6]Epoch 523/3000:  17%|█▋        | 522/3000 [03:47<17:18,  2.39it/s, v_num=1, train_loss_step=2.73e+6, train_loss_epoch=2.73e+6]Epoch 523/3000:  17%|█▋        | 523/3000 [03:47<17:19,  2.38it/s, v_num=1, train_loss_step=2.73e+6, train_loss_epoch=2.73e+6]Epoch 523/3000:  17%|█▋        | 523/3000 [03:47<17:19,  2.38it/s, v_num=1, train_loss_step=2.71e+6, train_loss_epoch=2.73e+6]Epoch 524/3000:  17%|█▋        | 523/3000 [03:47<17:19,  2.38it/s, v_num=1, train_loss_step=2.71e+6, train_loss_epoch=2.73e+6]Epoch 524/3000:  17%|█▋        | 524/3000 [03:48<17:07,  2.41it/s, v_num=1, train_loss_step=2.71e+6, train_loss_epoch=2.73e+6]Epoch 524/3000:  17%|█▋        | 524/3000 [03:48<17:07,  2.41it/s, v_num=1, train_loss_step=2.66e+6, train_loss_epoch=2.72e+6]Epoch 525/3000:  17%|█▋        | 524/3000 [03:48<17:07,  2.41it/s, v_num=1, train_loss_step=2.66e+6, train_loss_epoch=2.72e+6]Epoch 525/3000:  18%|█▊        | 525/3000 [03:48<17:36,  2.34it/s, v_num=1, train_loss_step=2.66e+6, train_loss_epoch=2.72e+6]Epoch 525/3000:  18%|█▊        | 525/3000 [03:48<17:36,  2.34it/s, v_num=1, train_loss_step=2.97e+6, train_loss_epoch=2.72e+6]Epoch 526/3000:  18%|█▊        | 525/3000 [03:48<17:36,  2.34it/s, v_num=1, train_loss_step=2.97e+6, train_loss_epoch=2.72e+6]Epoch 526/3000:  18%|█▊        | 526/3000 [03:49<17:45,  2.32it/s, v_num=1, train_loss_step=2.97e+6, train_loss_epoch=2.72e+6]Epoch 526/3000:  18%|█▊        | 526/3000 [03:49<17:45,  2.32it/s, v_num=1, train_loss_step=2.77e+6, train_loss_epoch=2.71e+6]Epoch 527/3000:  18%|█▊        | 526/3000 [03:49<17:45,  2.32it/s, v_num=1, train_loss_step=2.77e+6, train_loss_epoch=2.71e+6]Epoch 527/3000:  18%|█▊        | 527/3000 [03:49<17:29,  2.36it/s, v_num=1, train_loss_step=2.77e+6, train_loss_epoch=2.71e+6]Epoch 527/3000:  18%|█▊        | 527/3000 [03:49<17:29,  2.36it/s, v_num=1, train_loss_step=2.75e+6, train_loss_epoch=2.71e+6]Epoch 528/3000:  18%|█▊        | 527/3000 [03:49<17:29,  2.36it/s, v_num=1, train_loss_step=2.75e+6, train_loss_epoch=2.71e+6]Epoch 528/3000:  18%|█▊        | 528/3000 [03:49<17:49,  2.31it/s, v_num=1, train_loss_step=2.75e+6, train_loss_epoch=2.71e+6]Epoch 528/3000:  18%|█▊        | 528/3000 [03:49<17:49,  2.31it/s, v_num=1, train_loss_step=2.83e+6, train_loss_epoch=2.71e+6]Epoch 529/3000:  18%|█▊        | 528/3000 [03:49<17:49,  2.31it/s, v_num=1, train_loss_step=2.83e+6, train_loss_epoch=2.71e+6]Epoch 529/3000:  18%|█▊        | 529/3000 [03:50<17:09,  2.40it/s, v_num=1, train_loss_step=2.83e+6, train_loss_epoch=2.71e+6]Epoch 529/3000:  18%|█▊        | 529/3000 [03:50<17:09,  2.40it/s, v_num=1, train_loss_step=2.72e+6, train_loss_epoch=2.7e+6] Epoch 530/3000:  18%|█▊        | 529/3000 [03:50<17:09,  2.40it/s, v_num=1, train_loss_step=2.72e+6, train_loss_epoch=2.7e+6]Epoch 530/3000:  18%|█▊        | 530/3000 [03:50<17:29,  2.35it/s, v_num=1, train_loss_step=2.72e+6, train_loss_epoch=2.7e+6]Epoch 530/3000:  18%|█▊        | 530/3000 [03:50<17:29,  2.35it/s, v_num=1, train_loss_step=2.55e+6, train_loss_epoch=2.7e+6]Epoch 531/3000:  18%|█▊        | 530/3000 [03:50<17:29,  2.35it/s, v_num=1, train_loss_step=2.55e+6, train_loss_epoch=2.7e+6]Epoch 531/3000:  18%|█▊        | 531/3000 [03:51<17:22,  2.37it/s, v_num=1, train_loss_step=2.55e+6, train_loss_epoch=2.7e+6]Epoch 531/3000:  18%|█▊        | 531/3000 [03:51<17:22,  2.37it/s, v_num=1, train_loss_step=2.68e+6, train_loss_epoch=2.69e+6]Epoch 532/3000:  18%|█▊        | 531/3000 [03:51<17:22,  2.37it/s, v_num=1, train_loss_step=2.68e+6, train_loss_epoch=2.69e+6]Epoch 532/3000:  18%|█▊        | 532/3000 [03:51<17:33,  2.34it/s, v_num=1, train_loss_step=2.68e+6, train_loss_epoch=2.69e+6]Epoch 532/3000:  18%|█▊        | 532/3000 [03:51<17:33,  2.34it/s, v_num=1, train_loss_step=2.78e+6, train_loss_epoch=2.69e+6]Epoch 533/3000:  18%|█▊        | 532/3000 [03:51<17:33,  2.34it/s, v_num=1, train_loss_step=2.78e+6, train_loss_epoch=2.69e+6]Epoch 533/3000:  18%|█▊        | 533/3000 [03:52<17:30,  2.35it/s, v_num=1, train_loss_step=2.78e+6, train_loss_epoch=2.69e+6]Epoch 533/3000:  18%|█▊        | 533/3000 [03:52<17:30,  2.35it/s, v_num=1, train_loss_step=2.86e+6, train_loss_epoch=2.69e+6]Epoch 534/3000:  18%|█▊        | 533/3000 [03:52<17:30,  2.35it/s, v_num=1, train_loss_step=2.86e+6, train_loss_epoch=2.69e+6]Epoch 534/3000:  18%|█▊        | 534/3000 [03:52<17:44,  2.32it/s, v_num=1, train_loss_step=2.86e+6, train_loss_epoch=2.69e+6]Epoch 534/3000:  18%|█▊        | 534/3000 [03:52<17:44,  2.32it/s, v_num=1, train_loss_step=2.76e+6, train_loss_epoch=2.68e+6]Epoch 535/3000:  18%|█▊        | 534/3000 [03:52<17:44,  2.32it/s, v_num=1, train_loss_step=2.76e+6, train_loss_epoch=2.68e+6]Epoch 535/3000:  18%|█▊        | 535/3000 [03:52<17:41,  2.32it/s, v_num=1, train_loss_step=2.76e+6, train_loss_epoch=2.68e+6]Epoch 535/3000:  18%|█▊        | 535/3000 [03:52<17:41,  2.32it/s, v_num=1, train_loss_step=2.72e+6, train_loss_epoch=2.68e+6]Epoch 536/3000:  18%|█▊        | 535/3000 [03:52<17:41,  2.32it/s, v_num=1, train_loss_step=2.72e+6, train_loss_epoch=2.68e+6]Epoch 536/3000:  18%|█▊        | 536/3000 [03:53<18:13,  2.25it/s, v_num=1, train_loss_step=2.72e+6, train_loss_epoch=2.68e+6]Epoch 536/3000:  18%|█▊        | 536/3000 [03:53<18:13,  2.25it/s, v_num=1, train_loss_step=2.7e+6, train_loss_epoch=2.68e+6] Epoch 537/3000:  18%|█▊        | 536/3000 [03:53<18:13,  2.25it/s, v_num=1, train_loss_step=2.7e+6, train_loss_epoch=2.68e+6]Epoch 537/3000:  18%|█▊        | 537/3000 [03:53<18:28,  2.22it/s, v_num=1, train_loss_step=2.7e+6, train_loss_epoch=2.68e+6]Epoch 537/3000:  18%|█▊        | 537/3000 [03:53<18:28,  2.22it/s, v_num=1, train_loss_step=2.57e+6, train_loss_epoch=2.67e+6]Epoch 538/3000:  18%|█▊        | 537/3000 [03:53<18:28,  2.22it/s, v_num=1, train_loss_step=2.57e+6, train_loss_epoch=2.67e+6]Epoch 538/3000:  18%|█▊        | 538/3000 [03:54<19:06,  2.15it/s, v_num=1, train_loss_step=2.57e+6, train_loss_epoch=2.67e+6]Epoch 538/3000:  18%|█▊        | 538/3000 [03:54<19:06,  2.15it/s, v_num=1, train_loss_step=2.72e+6, train_loss_epoch=2.67e+6]Epoch 539/3000:  18%|█▊        | 538/3000 [03:54<19:06,  2.15it/s, v_num=1, train_loss_step=2.72e+6, train_loss_epoch=2.67e+6]Epoch 539/3000:  18%|█▊        | 539/3000 [03:54<19:08,  2.14it/s, v_num=1, train_loss_step=2.72e+6, train_loss_epoch=2.67e+6]Epoch 539/3000:  18%|█▊        | 539/3000 [03:54<19:08,  2.14it/s, v_num=1, train_loss_step=2.64e+6, train_loss_epoch=2.66e+6]Epoch 540/3000:  18%|█▊        | 539/3000 [03:54<19:08,  2.14it/s, v_num=1, train_loss_step=2.64e+6, train_loss_epoch=2.66e+6]Epoch 540/3000:  18%|█▊        | 540/3000 [03:55<19:15,  2.13it/s, v_num=1, train_loss_step=2.64e+6, train_loss_epoch=2.66e+6]Epoch 540/3000:  18%|█▊        | 540/3000 [03:55<19:15,  2.13it/s, v_num=1, train_loss_step=2.77e+6, train_loss_epoch=2.66e+6]Epoch 541/3000:  18%|█▊        | 540/3000 [03:55<19:15,  2.13it/s, v_num=1, train_loss_step=2.77e+6, train_loss_epoch=2.66e+6]Epoch 541/3000:  18%|█▊        | 541/3000 [03:55<18:35,  2.20it/s, v_num=1, train_loss_step=2.77e+6, train_loss_epoch=2.66e+6]Epoch 541/3000:  18%|█▊        | 541/3000 [03:55<18:35,  2.20it/s, v_num=1, train_loss_step=2.54e+6, train_loss_epoch=2.66e+6]Epoch 542/3000:  18%|█▊        | 541/3000 [03:55<18:35,  2.20it/s, v_num=1, train_loss_step=2.54e+6, train_loss_epoch=2.66e+6]Epoch 542/3000:  18%|█▊        | 542/3000 [03:56<18:55,  2.17it/s, v_num=1, train_loss_step=2.54e+6, train_loss_epoch=2.66e+6]Epoch 542/3000:  18%|█▊        | 542/3000 [03:56<18:55,  2.17it/s, v_num=1, train_loss_step=2.67e+6, train_loss_epoch=2.65e+6]Epoch 543/3000:  18%|█▊        | 542/3000 [03:56<18:55,  2.17it/s, v_num=1, train_loss_step=2.67e+6, train_loss_epoch=2.65e+6]Epoch 543/3000:  18%|█▊        | 543/3000 [03:56<18:24,  2.22it/s, v_num=1, train_loss_step=2.67e+6, train_loss_epoch=2.65e+6]Epoch 543/3000:  18%|█▊        | 543/3000 [03:56<18:24,  2.22it/s, v_num=1, train_loss_step=2.76e+6, train_loss_epoch=2.65e+6]Epoch 544/3000:  18%|█▊        | 543/3000 [03:56<18:24,  2.22it/s, v_num=1, train_loss_step=2.76e+6, train_loss_epoch=2.65e+6]Epoch 544/3000:  18%|█▊        | 544/3000 [03:57<18:10,  2.25it/s, v_num=1, train_loss_step=2.76e+6, train_loss_epoch=2.65e+6]Epoch 544/3000:  18%|█▊        | 544/3000 [03:57<18:10,  2.25it/s, v_num=1, train_loss_step=2.74e+6, train_loss_epoch=2.65e+6]Epoch 545/3000:  18%|█▊        | 544/3000 [03:57<18:10,  2.25it/s, v_num=1, train_loss_step=2.74e+6, train_loss_epoch=2.65e+6]Epoch 545/3000:  18%|█▊        | 545/3000 [03:57<18:44,  2.18it/s, v_num=1, train_loss_step=2.74e+6, train_loss_epoch=2.65e+6]Epoch 545/3000:  18%|█▊        | 545/3000 [03:57<18:44,  2.18it/s, v_num=1, train_loss_step=2.53e+6, train_loss_epoch=2.64e+6]Epoch 546/3000:  18%|█▊        | 545/3000 [03:57<18:44,  2.18it/s, v_num=1, train_loss_step=2.53e+6, train_loss_epoch=2.64e+6]Epoch 546/3000:  18%|█▊        | 546/3000 [03:57<18:21,  2.23it/s, v_num=1, train_loss_step=2.53e+6, train_loss_epoch=2.64e+6]Epoch 546/3000:  18%|█▊        | 546/3000 [03:57<18:21,  2.23it/s, v_num=1, train_loss_step=2.94e+6, train_loss_epoch=2.64e+6]Epoch 547/3000:  18%|█▊        | 546/3000 [03:57<18:21,  2.23it/s, v_num=1, train_loss_step=2.94e+6, train_loss_epoch=2.64e+6]Epoch 547/3000:  18%|█▊        | 547/3000 [03:58<18:23,  2.22it/s, v_num=1, train_loss_step=2.94e+6, train_loss_epoch=2.64e+6]Epoch 547/3000:  18%|█▊        | 547/3000 [03:58<18:23,  2.22it/s, v_num=1, train_loss_step=2.49e+6, train_loss_epoch=2.63e+6]Epoch 548/3000:  18%|█▊        | 547/3000 [03:58<18:23,  2.22it/s, v_num=1, train_loss_step=2.49e+6, train_loss_epoch=2.63e+6]Epoch 548/3000:  18%|█▊        | 548/3000 [03:58<18:22,  2.22it/s, v_num=1, train_loss_step=2.49e+6, train_loss_epoch=2.63e+6]Epoch 548/3000:  18%|█▊        | 548/3000 [03:58<18:22,  2.22it/s, v_num=1, train_loss_step=2.72e+6, train_loss_epoch=2.63e+6]Epoch 549/3000:  18%|█▊        | 548/3000 [03:58<18:22,  2.22it/s, v_num=1, train_loss_step=2.72e+6, train_loss_epoch=2.63e+6]Epoch 549/3000:  18%|█▊        | 549/3000 [03:59<18:24,  2.22it/s, v_num=1, train_loss_step=2.72e+6, train_loss_epoch=2.63e+6]Epoch 549/3000:  18%|█▊        | 549/3000 [03:59<18:24,  2.22it/s, v_num=1, train_loss_step=2.67e+6, train_loss_epoch=2.63e+6]Epoch 550/3000:  18%|█▊        | 549/3000 [03:59<18:24,  2.22it/s, v_num=1, train_loss_step=2.67e+6, train_loss_epoch=2.63e+6]Epoch 550/3000:  18%|█▊        | 550/3000 [03:59<17:22,  2.35it/s, v_num=1, train_loss_step=2.67e+6, train_loss_epoch=2.63e+6]Epoch 550/3000:  18%|█▊        | 550/3000 [03:59<17:22,  2.35it/s, v_num=1, train_loss_step=2.54e+6, train_loss_epoch=2.62e+6]Epoch 551/3000:  18%|█▊        | 550/3000 [03:59<17:22,  2.35it/s, v_num=1, train_loss_step=2.54e+6, train_loss_epoch=2.62e+6]Epoch 551/3000:  18%|█▊        | 551/3000 [04:00<17:34,  2.32it/s, v_num=1, train_loss_step=2.54e+6, train_loss_epoch=2.62e+6]Epoch 551/3000:  18%|█▊        | 551/3000 [04:00<17:34,  2.32it/s, v_num=1, train_loss_step=2.69e+6, train_loss_epoch=2.62e+6]Epoch 552/3000:  18%|█▊        | 551/3000 [04:00<17:34,  2.32it/s, v_num=1, train_loss_step=2.69e+6, train_loss_epoch=2.62e+6]Epoch 552/3000:  18%|█▊        | 552/3000 [04:00<17:45,  2.30it/s, v_num=1, train_loss_step=2.69e+6, train_loss_epoch=2.62e+6]Epoch 552/3000:  18%|█▊        | 552/3000 [04:00<17:45,  2.30it/s, v_num=1, train_loss_step=2.44e+6, train_loss_epoch=2.62e+6]Epoch 553/3000:  18%|█▊        | 552/3000 [04:00<17:45,  2.30it/s, v_num=1, train_loss_step=2.44e+6, train_loss_epoch=2.62e+6]Epoch 553/3000:  18%|█▊        | 553/3000 [04:00<17:25,  2.34it/s, v_num=1, train_loss_step=2.44e+6, train_loss_epoch=2.62e+6]Epoch 553/3000:  18%|█▊        | 553/3000 [04:00<17:25,  2.34it/s, v_num=1, train_loss_step=2.65e+6, train_loss_epoch=2.61e+6]Epoch 554/3000:  18%|█▊        | 553/3000 [04:00<17:25,  2.34it/s, v_num=1, train_loss_step=2.65e+6, train_loss_epoch=2.61e+6]Epoch 554/3000:  18%|█▊        | 554/3000 [04:01<17:12,  2.37it/s, v_num=1, train_loss_step=2.65e+6, train_loss_epoch=2.61e+6]Epoch 554/3000:  18%|█▊        | 554/3000 [04:01<17:12,  2.37it/s, v_num=1, train_loss_step=2.77e+6, train_loss_epoch=2.61e+6]Epoch 555/3000:  18%|█▊        | 554/3000 [04:01<17:12,  2.37it/s, v_num=1, train_loss_step=2.77e+6, train_loss_epoch=2.61e+6]Epoch 555/3000:  18%|█▊        | 555/3000 [04:01<16:51,  2.42it/s, v_num=1, train_loss_step=2.77e+6, train_loss_epoch=2.61e+6]Epoch 555/3000:  18%|█▊        | 555/3000 [04:01<16:51,  2.42it/s, v_num=1, train_loss_step=2.54e+6, train_loss_epoch=2.61e+6]Epoch 556/3000:  18%|█▊        | 555/3000 [04:01<16:51,  2.42it/s, v_num=1, train_loss_step=2.54e+6, train_loss_epoch=2.61e+6]Epoch 556/3000:  19%|█▊        | 556/3000 [04:02<17:09,  2.37it/s, v_num=1, train_loss_step=2.54e+6, train_loss_epoch=2.61e+6]Epoch 556/3000:  19%|█▊        | 556/3000 [04:02<17:09,  2.37it/s, v_num=1, train_loss_step=2.59e+6, train_loss_epoch=2.6e+6] Epoch 557/3000:  19%|█▊        | 556/3000 [04:02<17:09,  2.37it/s, v_num=1, train_loss_step=2.59e+6, train_loss_epoch=2.6e+6]Epoch 557/3000:  19%|█▊        | 557/3000 [04:02<17:35,  2.31it/s, v_num=1, train_loss_step=2.59e+6, train_loss_epoch=2.6e+6]Epoch 557/3000:  19%|█▊        | 557/3000 [04:02<17:35,  2.31it/s, v_num=1, train_loss_step=2.87e+6, train_loss_epoch=2.6e+6]Epoch 558/3000:  19%|█▊        | 557/3000 [04:02<17:35,  2.31it/s, v_num=1, train_loss_step=2.87e+6, train_loss_epoch=2.6e+6]Epoch 558/3000:  19%|█▊        | 558/3000 [04:03<17:31,  2.32it/s, v_num=1, train_loss_step=2.87e+6, train_loss_epoch=2.6e+6]Epoch 558/3000:  19%|█▊        | 558/3000 [04:03<17:31,  2.32it/s, v_num=1, train_loss_step=2.54e+6, train_loss_epoch=2.6e+6]Epoch 559/3000:  19%|█▊        | 558/3000 [04:03<17:31,  2.32it/s, v_num=1, train_loss_step=2.54e+6, train_loss_epoch=2.6e+6]Epoch 559/3000:  19%|█▊        | 559/3000 [04:03<17:51,  2.28it/s, v_num=1, train_loss_step=2.54e+6, train_loss_epoch=2.6e+6]Epoch 559/3000:  19%|█▊        | 559/3000 [04:03<17:51,  2.28it/s, v_num=1, train_loss_step=2.55e+6, train_loss_epoch=2.59e+6]Epoch 560/3000:  19%|█▊        | 559/3000 [04:03<17:51,  2.28it/s, v_num=1, train_loss_step=2.55e+6, train_loss_epoch=2.59e+6]Epoch 560/3000:  19%|█▊        | 560/3000 [04:04<18:01,  2.26it/s, v_num=1, train_loss_step=2.55e+6, train_loss_epoch=2.59e+6]Epoch 560/3000:  19%|█▊        | 560/3000 [04:04<18:01,  2.26it/s, v_num=1, train_loss_step=2.71e+6, train_loss_epoch=2.59e+6]Epoch 561/3000:  19%|█▊        | 560/3000 [04:04<18:01,  2.26it/s, v_num=1, train_loss_step=2.71e+6, train_loss_epoch=2.59e+6]Epoch 561/3000:  19%|█▊        | 561/3000 [04:04<19:00,  2.14it/s, v_num=1, train_loss_step=2.71e+6, train_loss_epoch=2.59e+6]Epoch 561/3000:  19%|█▊        | 561/3000 [04:04<19:00,  2.14it/s, v_num=1, train_loss_step=2.7e+6, train_loss_epoch=2.59e+6] Epoch 562/3000:  19%|█▊        | 561/3000 [04:04<19:00,  2.14it/s, v_num=1, train_loss_step=2.7e+6, train_loss_epoch=2.59e+6]Epoch 562/3000:  19%|█▊        | 562/3000 [04:04<18:12,  2.23it/s, v_num=1, train_loss_step=2.7e+6, train_loss_epoch=2.59e+6]Epoch 562/3000:  19%|█▊        | 562/3000 [04:04<18:12,  2.23it/s, v_num=1, train_loss_step=2.63e+6, train_loss_epoch=2.58e+6]Epoch 563/3000:  19%|█▊        | 562/3000 [04:04<18:12,  2.23it/s, v_num=1, train_loss_step=2.63e+6, train_loss_epoch=2.58e+6]Epoch 563/3000:  19%|█▉        | 563/3000 [04:05<17:26,  2.33it/s, v_num=1, train_loss_step=2.63e+6, train_loss_epoch=2.58e+6]Epoch 563/3000:  19%|█▉        | 563/3000 [04:05<17:26,  2.33it/s, v_num=1, train_loss_step=2.51e+6, train_loss_epoch=2.58e+6]Epoch 564/3000:  19%|█▉        | 563/3000 [04:05<17:26,  2.33it/s, v_num=1, train_loss_step=2.51e+6, train_loss_epoch=2.58e+6]Epoch 564/3000:  19%|█▉        | 564/3000 [04:05<17:50,  2.28it/s, v_num=1, train_loss_step=2.51e+6, train_loss_epoch=2.58e+6]Epoch 564/3000:  19%|█▉        | 564/3000 [04:05<17:50,  2.28it/s, v_num=1, train_loss_step=2.45e+6, train_loss_epoch=2.58e+6]Epoch 565/3000:  19%|█▉        | 564/3000 [04:05<17:50,  2.28it/s, v_num=1, train_loss_step=2.45e+6, train_loss_epoch=2.58e+6]Epoch 565/3000:  19%|█▉        | 565/3000 [04:06<18:03,  2.25it/s, v_num=1, train_loss_step=2.45e+6, train_loss_epoch=2.58e+6]Epoch 565/3000:  19%|█▉        | 565/3000 [04:06<18:03,  2.25it/s, v_num=1, train_loss_step=2.65e+6, train_loss_epoch=2.57e+6]Epoch 566/3000:  19%|█▉        | 565/3000 [04:06<18:03,  2.25it/s, v_num=1, train_loss_step=2.65e+6, train_loss_epoch=2.57e+6]Epoch 566/3000:  19%|█▉        | 566/3000 [04:06<18:03,  2.25it/s, v_num=1, train_loss_step=2.65e+6, train_loss_epoch=2.57e+6]Epoch 566/3000:  19%|█▉        | 566/3000 [04:06<18:03,  2.25it/s, v_num=1, train_loss_step=2.61e+6, train_loss_epoch=2.57e+6]Epoch 567/3000:  19%|█▉        | 566/3000 [04:06<18:03,  2.25it/s, v_num=1, train_loss_step=2.61e+6, train_loss_epoch=2.57e+6]Epoch 567/3000:  19%|█▉        | 567/3000 [04:07<18:21,  2.21it/s, v_num=1, train_loss_step=2.61e+6, train_loss_epoch=2.57e+6]Epoch 567/3000:  19%|█▉        | 567/3000 [04:07<18:21,  2.21it/s, v_num=1, train_loss_step=2.46e+6, train_loss_epoch=2.57e+6]Epoch 568/3000:  19%|█▉        | 567/3000 [04:07<18:21,  2.21it/s, v_num=1, train_loss_step=2.46e+6, train_loss_epoch=2.57e+6]Epoch 568/3000:  19%|█▉        | 568/3000 [04:07<18:54,  2.14it/s, v_num=1, train_loss_step=2.46e+6, train_loss_epoch=2.57e+6]Epoch 568/3000:  19%|█▉        | 568/3000 [04:07<18:54,  2.14it/s, v_num=1, train_loss_step=2.6e+6, train_loss_epoch=2.56e+6] Epoch 569/3000:  19%|█▉        | 568/3000 [04:07<18:54,  2.14it/s, v_num=1, train_loss_step=2.6e+6, train_loss_epoch=2.56e+6]Epoch 569/3000:  19%|█▉        | 569/3000 [04:08<19:17,  2.10it/s, v_num=1, train_loss_step=2.6e+6, train_loss_epoch=2.56e+6]Epoch 569/3000:  19%|█▉        | 569/3000 [04:08<19:17,  2.10it/s, v_num=1, train_loss_step=2.4e+6, train_loss_epoch=2.56e+6]Epoch 570/3000:  19%|█▉        | 569/3000 [04:08<19:17,  2.10it/s, v_num=1, train_loss_step=2.4e+6, train_loss_epoch=2.56e+6]Epoch 570/3000:  19%|█▉        | 570/3000 [04:08<19:14,  2.11it/s, v_num=1, train_loss_step=2.4e+6, train_loss_epoch=2.56e+6]Epoch 570/3000:  19%|█▉        | 570/3000 [04:08<19:14,  2.11it/s, v_num=1, train_loss_step=2.56e+6, train_loss_epoch=2.56e+6]Epoch 571/3000:  19%|█▉        | 570/3000 [04:08<19:14,  2.11it/s, v_num=1, train_loss_step=2.56e+6, train_loss_epoch=2.56e+6]Epoch 571/3000:  19%|█▉        | 571/3000 [04:09<19:00,  2.13it/s, v_num=1, train_loss_step=2.56e+6, train_loss_epoch=2.56e+6]Epoch 571/3000:  19%|█▉        | 571/3000 [04:09<19:00,  2.13it/s, v_num=1, train_loss_step=2.46e+6, train_loss_epoch=2.55e+6]Epoch 572/3000:  19%|█▉        | 571/3000 [04:09<19:00,  2.13it/s, v_num=1, train_loss_step=2.46e+6, train_loss_epoch=2.55e+6]Epoch 572/3000:  19%|█▉        | 572/3000 [04:09<17:45,  2.28it/s, v_num=1, train_loss_step=2.46e+6, train_loss_epoch=2.55e+6]Epoch 572/3000:  19%|█▉        | 572/3000 [04:09<17:45,  2.28it/s, v_num=1, train_loss_step=2.69e+6, train_loss_epoch=2.55e+6]Epoch 573/3000:  19%|█▉        | 572/3000 [04:09<17:45,  2.28it/s, v_num=1, train_loss_step=2.69e+6, train_loss_epoch=2.55e+6]Epoch 573/3000:  19%|█▉        | 573/3000 [04:09<17:34,  2.30it/s, v_num=1, train_loss_step=2.69e+6, train_loss_epoch=2.55e+6]Epoch 573/3000:  19%|█▉        | 573/3000 [04:09<17:34,  2.30it/s, v_num=1, train_loss_step=2.64e+6, train_loss_epoch=2.55e+6]Epoch 574/3000:  19%|█▉        | 573/3000 [04:09<17:34,  2.30it/s, v_num=1, train_loss_step=2.64e+6, train_loss_epoch=2.55e+6]Epoch 574/3000:  19%|█▉        | 574/3000 [04:10<17:00,  2.38it/s, v_num=1, train_loss_step=2.64e+6, train_loss_epoch=2.55e+6]Epoch 574/3000:  19%|█▉        | 574/3000 [04:10<17:00,  2.38it/s, v_num=1, train_loss_step=2.5e+6, train_loss_epoch=2.54e+6] Epoch 575/3000:  19%|█▉        | 574/3000 [04:10<17:00,  2.38it/s, v_num=1, train_loss_step=2.5e+6, train_loss_epoch=2.54e+6]Epoch 575/3000:  19%|█▉        | 575/3000 [04:10<16:27,  2.46it/s, v_num=1, train_loss_step=2.5e+6, train_loss_epoch=2.54e+6]Epoch 575/3000:  19%|█▉        | 575/3000 [04:10<16:27,  2.46it/s, v_num=1, train_loss_step=2.53e+6, train_loss_epoch=2.54e+6]Epoch 576/3000:  19%|█▉        | 575/3000 [04:10<16:27,  2.46it/s, v_num=1, train_loss_step=2.53e+6, train_loss_epoch=2.54e+6]Epoch 576/3000:  19%|█▉        | 576/3000 [04:11<16:41,  2.42it/s, v_num=1, train_loss_step=2.53e+6, train_loss_epoch=2.54e+6]Epoch 576/3000:  19%|█▉        | 576/3000 [04:11<16:41,  2.42it/s, v_num=1, train_loss_step=2.54e+6, train_loss_epoch=2.54e+6]Epoch 577/3000:  19%|█▉        | 576/3000 [04:11<16:41,  2.42it/s, v_num=1, train_loss_step=2.54e+6, train_loss_epoch=2.54e+6]Epoch 577/3000:  19%|█▉        | 577/3000 [04:11<17:30,  2.31it/s, v_num=1, train_loss_step=2.54e+6, train_loss_epoch=2.54e+6]Epoch 577/3000:  19%|█▉        | 577/3000 [04:11<17:30,  2.31it/s, v_num=1, train_loss_step=2.62e+6, train_loss_epoch=2.53e+6]Epoch 578/3000:  19%|█▉        | 577/3000 [04:11<17:30,  2.31it/s, v_num=1, train_loss_step=2.62e+6, train_loss_epoch=2.53e+6]Epoch 578/3000:  19%|█▉        | 578/3000 [04:12<17:57,  2.25it/s, v_num=1, train_loss_step=2.62e+6, train_loss_epoch=2.53e+6]Epoch 578/3000:  19%|█▉        | 578/3000 [04:12<17:57,  2.25it/s, v_num=1, train_loss_step=2.41e+6, train_loss_epoch=2.53e+6]Epoch 579/3000:  19%|█▉        | 578/3000 [04:12<17:57,  2.25it/s, v_num=1, train_loss_step=2.41e+6, train_loss_epoch=2.53e+6]Epoch 579/3000:  19%|█▉        | 579/3000 [04:12<18:40,  2.16it/s, v_num=1, train_loss_step=2.41e+6, train_loss_epoch=2.53e+6]Epoch 579/3000:  19%|█▉        | 579/3000 [04:12<18:40,  2.16it/s, v_num=1, train_loss_step=2.52e+6, train_loss_epoch=2.53e+6]Epoch 580/3000:  19%|█▉        | 579/3000 [04:12<18:40,  2.16it/s, v_num=1, train_loss_step=2.52e+6, train_loss_epoch=2.53e+6]Epoch 580/3000:  19%|█▉        | 580/3000 [04:12<18:07,  2.22it/s, v_num=1, train_loss_step=2.52e+6, train_loss_epoch=2.53e+6]Epoch 580/3000:  19%|█▉        | 580/3000 [04:12<18:07,  2.22it/s, v_num=1, train_loss_step=2.48e+6, train_loss_epoch=2.52e+6]Epoch 581/3000:  19%|█▉        | 580/3000 [04:12<18:07,  2.22it/s, v_num=1, train_loss_step=2.48e+6, train_loss_epoch=2.52e+6]Epoch 581/3000:  19%|█▉        | 581/3000 [04:13<17:48,  2.26it/s, v_num=1, train_loss_step=2.48e+6, train_loss_epoch=2.52e+6]Epoch 581/3000:  19%|█▉        | 581/3000 [04:13<17:48,  2.26it/s, v_num=1, train_loss_step=2.55e+6, train_loss_epoch=2.52e+6]Epoch 582/3000:  19%|█▉        | 581/3000 [04:13<17:48,  2.26it/s, v_num=1, train_loss_step=2.55e+6, train_loss_epoch=2.52e+6]Epoch 582/3000:  19%|█▉        | 582/3000 [04:13<17:21,  2.32it/s, v_num=1, train_loss_step=2.55e+6, train_loss_epoch=2.52e+6]Epoch 582/3000:  19%|█▉        | 582/3000 [04:13<17:21,  2.32it/s, v_num=1, train_loss_step=2.57e+6, train_loss_epoch=2.52e+6]Epoch 583/3000:  19%|█▉        | 582/3000 [04:13<17:21,  2.32it/s, v_num=1, train_loss_step=2.57e+6, train_loss_epoch=2.52e+6]Epoch 583/3000:  19%|█▉        | 583/3000 [04:14<17:56,  2.25it/s, v_num=1, train_loss_step=2.57e+6, train_loss_epoch=2.52e+6]Epoch 583/3000:  19%|█▉        | 583/3000 [04:14<17:56,  2.25it/s, v_num=1, train_loss_step=2.6e+6, train_loss_epoch=2.51e+6] Epoch 584/3000:  19%|█▉        | 583/3000 [04:14<17:56,  2.25it/s, v_num=1, train_loss_step=2.6e+6, train_loss_epoch=2.51e+6]Epoch 584/3000:  19%|█▉        | 584/3000 [04:14<17:25,  2.31it/s, v_num=1, train_loss_step=2.6e+6, train_loss_epoch=2.51e+6]Epoch 584/3000:  19%|█▉        | 584/3000 [04:14<17:25,  2.31it/s, v_num=1, train_loss_step=2.48e+6, train_loss_epoch=2.51e+6]Epoch 585/3000:  19%|█▉        | 584/3000 [04:14<17:25,  2.31it/s, v_num=1, train_loss_step=2.48e+6, train_loss_epoch=2.51e+6]Epoch 585/3000:  20%|█▉        | 585/3000 [04:15<17:39,  2.28it/s, v_num=1, train_loss_step=2.48e+6, train_loss_epoch=2.51e+6]Epoch 585/3000:  20%|█▉        | 585/3000 [04:15<17:39,  2.28it/s, v_num=1, train_loss_step=2.53e+6, train_loss_epoch=2.51e+6]Epoch 586/3000:  20%|█▉        | 585/3000 [04:15<17:39,  2.28it/s, v_num=1, train_loss_step=2.53e+6, train_loss_epoch=2.51e+6]Epoch 586/3000:  20%|█▉        | 586/3000 [04:15<17:38,  2.28it/s, v_num=1, train_loss_step=2.53e+6, train_loss_epoch=2.51e+6]Epoch 586/3000:  20%|█▉        | 586/3000 [04:15<17:38,  2.28it/s, v_num=1, train_loss_step=2.3e+6, train_loss_epoch=2.51e+6] Epoch 587/3000:  20%|█▉        | 586/3000 [04:15<17:38,  2.28it/s, v_num=1, train_loss_step=2.3e+6, train_loss_epoch=2.51e+6]Epoch 587/3000:  20%|█▉        | 587/3000 [04:16<18:14,  2.21it/s, v_num=1, train_loss_step=2.3e+6, train_loss_epoch=2.51e+6]Epoch 587/3000:  20%|█▉        | 587/3000 [04:16<18:14,  2.21it/s, v_num=1, train_loss_step=2.58e+6, train_loss_epoch=2.5e+6]Epoch 588/3000:  20%|█▉        | 587/3000 [04:16<18:14,  2.21it/s, v_num=1, train_loss_step=2.58e+6, train_loss_epoch=2.5e+6]Epoch 588/3000:  20%|█▉        | 588/3000 [04:16<17:42,  2.27it/s, v_num=1, train_loss_step=2.58e+6, train_loss_epoch=2.5e+6]Epoch 588/3000:  20%|█▉        | 588/3000 [04:16<17:42,  2.27it/s, v_num=1, train_loss_step=2.67e+6, train_loss_epoch=2.5e+6]Epoch 589/3000:  20%|█▉        | 588/3000 [04:16<17:42,  2.27it/s, v_num=1, train_loss_step=2.67e+6, train_loss_epoch=2.5e+6]Epoch 589/3000:  20%|█▉        | 589/3000 [04:16<18:01,  2.23it/s, v_num=1, train_loss_step=2.67e+6, train_loss_epoch=2.5e+6]Epoch 589/3000:  20%|█▉        | 589/3000 [04:16<18:01,  2.23it/s, v_num=1, train_loss_step=2.58e+6, train_loss_epoch=2.5e+6]Epoch 590/3000:  20%|█▉        | 589/3000 [04:16<18:01,  2.23it/s, v_num=1, train_loss_step=2.58e+6, train_loss_epoch=2.5e+6]Epoch 590/3000:  20%|█▉        | 590/3000 [04:17<17:46,  2.26it/s, v_num=1, train_loss_step=2.58e+6, train_loss_epoch=2.5e+6]Epoch 590/3000:  20%|█▉        | 590/3000 [04:17<17:46,  2.26it/s, v_num=1, train_loss_step=2.52e+6, train_loss_epoch=2.49e+6]Epoch 591/3000:  20%|█▉        | 590/3000 [04:17<17:46,  2.26it/s, v_num=1, train_loss_step=2.52e+6, train_loss_epoch=2.49e+6]Epoch 591/3000:  20%|█▉        | 591/3000 [04:17<17:40,  2.27it/s, v_num=1, train_loss_step=2.52e+6, train_loss_epoch=2.49e+6]Epoch 591/3000:  20%|█▉        | 591/3000 [04:17<17:40,  2.27it/s, v_num=1, train_loss_step=2.45e+6, train_loss_epoch=2.49e+6]Epoch 592/3000:  20%|█▉        | 591/3000 [04:17<17:40,  2.27it/s, v_num=1, train_loss_step=2.45e+6, train_loss_epoch=2.49e+6]Epoch 592/3000:  20%|█▉        | 592/3000 [04:18<18:11,  2.21it/s, v_num=1, train_loss_step=2.45e+6, train_loss_epoch=2.49e+6]Epoch 592/3000:  20%|█▉        | 592/3000 [04:18<18:11,  2.21it/s, v_num=1, train_loss_step=2.48e+6, train_loss_epoch=2.49e+6]Epoch 593/3000:  20%|█▉        | 592/3000 [04:18<18:11,  2.21it/s, v_num=1, train_loss_step=2.48e+6, train_loss_epoch=2.49e+6]Epoch 593/3000:  20%|█▉        | 593/3000 [04:18<17:36,  2.28it/s, v_num=1, train_loss_step=2.48e+6, train_loss_epoch=2.49e+6]Epoch 593/3000:  20%|█▉        | 593/3000 [04:18<17:36,  2.28it/s, v_num=1, train_loss_step=2.68e+6, train_loss_epoch=2.48e+6]Epoch 594/3000:  20%|█▉        | 593/3000 [04:18<17:36,  2.28it/s, v_num=1, train_loss_step=2.68e+6, train_loss_epoch=2.48e+6]Epoch 594/3000:  20%|█▉        | 594/3000 [04:19<17:49,  2.25it/s, v_num=1, train_loss_step=2.68e+6, train_loss_epoch=2.48e+6]Epoch 594/3000:  20%|█▉        | 594/3000 [04:19<17:49,  2.25it/s, v_num=1, train_loss_step=2.49e+6, train_loss_epoch=2.48e+6]Epoch 595/3000:  20%|█▉        | 594/3000 [04:19<17:49,  2.25it/s, v_num=1, train_loss_step=2.49e+6, train_loss_epoch=2.48e+6]Epoch 595/3000:  20%|█▉        | 595/3000 [04:19<17:33,  2.28it/s, v_num=1, train_loss_step=2.49e+6, train_loss_epoch=2.48e+6]Epoch 595/3000:  20%|█▉        | 595/3000 [04:19<17:33,  2.28it/s, v_num=1, train_loss_step=2.36e+6, train_loss_epoch=2.48e+6]Epoch 596/3000:  20%|█▉        | 595/3000 [04:19<17:33,  2.28it/s, v_num=1, train_loss_step=2.36e+6, train_loss_epoch=2.48e+6]Epoch 596/3000:  20%|█▉        | 596/3000 [04:20<17:56,  2.23it/s, v_num=1, train_loss_step=2.36e+6, train_loss_epoch=2.48e+6]Epoch 596/3000:  20%|█▉        | 596/3000 [04:20<17:56,  2.23it/s, v_num=1, train_loss_step=2.58e+6, train_loss_epoch=2.48e+6]Epoch 597/3000:  20%|█▉        | 596/3000 [04:20<17:56,  2.23it/s, v_num=1, train_loss_step=2.58e+6, train_loss_epoch=2.48e+6]Epoch 597/3000:  20%|█▉        | 597/3000 [04:20<18:01,  2.22it/s, v_num=1, train_loss_step=2.58e+6, train_loss_epoch=2.48e+6]Epoch 597/3000:  20%|█▉        | 597/3000 [04:20<18:01,  2.22it/s, v_num=1, train_loss_step=2.36e+6, train_loss_epoch=2.47e+6]Epoch 598/3000:  20%|█▉        | 597/3000 [04:20<18:01,  2.22it/s, v_num=1, train_loss_step=2.36e+6, train_loss_epoch=2.47e+6]Epoch 598/3000:  20%|█▉        | 598/3000 [04:20<16:56,  2.36it/s, v_num=1, train_loss_step=2.36e+6, train_loss_epoch=2.47e+6]Epoch 598/3000:  20%|█▉        | 598/3000 [04:20<16:56,  2.36it/s, v_num=1, train_loss_step=2.5e+6, train_loss_epoch=2.47e+6] Epoch 599/3000:  20%|█▉        | 598/3000 [04:20<16:56,  2.36it/s, v_num=1, train_loss_step=2.5e+6, train_loss_epoch=2.47e+6]Epoch 599/3000:  20%|█▉        | 599/3000 [04:21<16:56,  2.36it/s, v_num=1, train_loss_step=2.5e+6, train_loss_epoch=2.47e+6]Epoch 599/3000:  20%|█▉        | 599/3000 [04:21<16:56,  2.36it/s, v_num=1, train_loss_step=2.38e+6, train_loss_epoch=2.47e+6]Epoch 600/3000:  20%|█▉        | 599/3000 [04:21<16:56,  2.36it/s, v_num=1, train_loss_step=2.38e+6, train_loss_epoch=2.47e+6]Epoch 600/3000:  20%|██        | 600/3000 [04:21<16:24,  2.44it/s, v_num=1, train_loss_step=2.38e+6, train_loss_epoch=2.47e+6]Epoch 600/3000:  20%|██        | 600/3000 [04:21<16:24,  2.44it/s, v_num=1, train_loss_step=2.37e+6, train_loss_epoch=2.46e+6]Epoch 601/3000:  20%|██        | 600/3000 [04:21<16:24,  2.44it/s, v_num=1, train_loss_step=2.37e+6, train_loss_epoch=2.46e+6]Epoch 601/3000:  20%|██        | 601/3000 [04:22<16:31,  2.42it/s, v_num=1, train_loss_step=2.37e+6, train_loss_epoch=2.46e+6]Epoch 601/3000:  20%|██        | 601/3000 [04:22<16:31,  2.42it/s, v_num=1, train_loss_step=2.55e+6, train_loss_epoch=2.46e+6]Epoch 602/3000:  20%|██        | 601/3000 [04:22<16:31,  2.42it/s, v_num=1, train_loss_step=2.55e+6, train_loss_epoch=2.46e+6]Epoch 602/3000:  20%|██        | 602/3000 [04:22<16:55,  2.36it/s, v_num=1, train_loss_step=2.55e+6, train_loss_epoch=2.46e+6]Epoch 602/3000:  20%|██        | 602/3000 [04:22<16:55,  2.36it/s, v_num=1, train_loss_step=2.34e+6, train_loss_epoch=2.46e+6]Epoch 603/3000:  20%|██        | 602/3000 [04:22<16:55,  2.36it/s, v_num=1, train_loss_step=2.34e+6, train_loss_epoch=2.46e+6]Epoch 603/3000:  20%|██        | 603/3000 [04:23<17:39,  2.26it/s, v_num=1, train_loss_step=2.34e+6, train_loss_epoch=2.46e+6]Epoch 603/3000:  20%|██        | 603/3000 [04:23<17:39,  2.26it/s, v_num=1, train_loss_step=2.47e+6, train_loss_epoch=2.45e+6]Epoch 604/3000:  20%|██        | 603/3000 [04:23<17:39,  2.26it/s, v_num=1, train_loss_step=2.47e+6, train_loss_epoch=2.45e+6]Epoch 604/3000:  20%|██        | 604/3000 [04:23<17:54,  2.23it/s, v_num=1, train_loss_step=2.47e+6, train_loss_epoch=2.45e+6]Epoch 604/3000:  20%|██        | 604/3000 [04:23<17:54,  2.23it/s, v_num=1, train_loss_step=2.41e+6, train_loss_epoch=2.45e+6]Epoch 605/3000:  20%|██        | 604/3000 [04:23<17:54,  2.23it/s, v_num=1, train_loss_step=2.41e+6, train_loss_epoch=2.45e+6]Epoch 605/3000:  20%|██        | 605/3000 [04:23<17:34,  2.27it/s, v_num=1, train_loss_step=2.41e+6, train_loss_epoch=2.45e+6]Epoch 605/3000:  20%|██        | 605/3000 [04:23<17:34,  2.27it/s, v_num=1, train_loss_step=2.52e+6, train_loss_epoch=2.45e+6]Epoch 606/3000:  20%|██        | 605/3000 [04:23<17:34,  2.27it/s, v_num=1, train_loss_step=2.52e+6, train_loss_epoch=2.45e+6]Epoch 606/3000:  20%|██        | 606/3000 [04:24<17:29,  2.28it/s, v_num=1, train_loss_step=2.52e+6, train_loss_epoch=2.45e+6]Epoch 606/3000:  20%|██        | 606/3000 [04:24<17:29,  2.28it/s, v_num=1, train_loss_step=2.47e+6, train_loss_epoch=2.45e+6]Epoch 607/3000:  20%|██        | 606/3000 [04:24<17:29,  2.28it/s, v_num=1, train_loss_step=2.47e+6, train_loss_epoch=2.45e+6]Epoch 607/3000:  20%|██        | 607/3000 [04:24<17:08,  2.33it/s, v_num=1, train_loss_step=2.47e+6, train_loss_epoch=2.45e+6]Epoch 607/3000:  20%|██        | 607/3000 [04:24<17:08,  2.33it/s, v_num=1, train_loss_step=2.51e+6, train_loss_epoch=2.44e+6]Epoch 608/3000:  20%|██        | 607/3000 [04:24<17:08,  2.33it/s, v_num=1, train_loss_step=2.51e+6, train_loss_epoch=2.44e+6]Epoch 608/3000:  20%|██        | 608/3000 [04:25<17:25,  2.29it/s, v_num=1, train_loss_step=2.51e+6, train_loss_epoch=2.44e+6]Epoch 608/3000:  20%|██        | 608/3000 [04:25<17:25,  2.29it/s, v_num=1, train_loss_step=2.63e+6, train_loss_epoch=2.44e+6]Epoch 609/3000:  20%|██        | 608/3000 [04:25<17:25,  2.29it/s, v_num=1, train_loss_step=2.63e+6, train_loss_epoch=2.44e+6]Epoch 609/3000:  20%|██        | 609/3000 [04:25<17:49,  2.24it/s, v_num=1, train_loss_step=2.63e+6, train_loss_epoch=2.44e+6]Epoch 609/3000:  20%|██        | 609/3000 [04:25<17:49,  2.24it/s, v_num=1, train_loss_step=2.27e+6, train_loss_epoch=2.44e+6]Epoch 610/3000:  20%|██        | 609/3000 [04:25<17:49,  2.24it/s, v_num=1, train_loss_step=2.27e+6, train_loss_epoch=2.44e+6]Epoch 610/3000:  20%|██        | 610/3000 [04:26<18:42,  2.13it/s, v_num=1, train_loss_step=2.27e+6, train_loss_epoch=2.44e+6]Epoch 610/3000:  20%|██        | 610/3000 [04:26<18:42,  2.13it/s, v_num=1, train_loss_step=2.64e+6, train_loss_epoch=2.44e+6]Epoch 611/3000:  20%|██        | 610/3000 [04:26<18:42,  2.13it/s, v_num=1, train_loss_step=2.64e+6, train_loss_epoch=2.44e+6]Epoch 611/3000:  20%|██        | 611/3000 [04:26<17:36,  2.26it/s, v_num=1, train_loss_step=2.64e+6, train_loss_epoch=2.44e+6]Epoch 611/3000:  20%|██        | 611/3000 [04:26<17:36,  2.26it/s, v_num=1, train_loss_step=2.45e+6, train_loss_epoch=2.43e+6]Epoch 612/3000:  20%|██        | 611/3000 [04:26<17:36,  2.26it/s, v_num=1, train_loss_step=2.45e+6, train_loss_epoch=2.43e+6]Epoch 612/3000:  20%|██        | 612/3000 [04:26<17:29,  2.28it/s, v_num=1, train_loss_step=2.45e+6, train_loss_epoch=2.43e+6]Epoch 612/3000:  20%|██        | 612/3000 [04:26<17:29,  2.28it/s, v_num=1, train_loss_step=2.42e+6, train_loss_epoch=2.43e+6]Epoch 613/3000:  20%|██        | 612/3000 [04:27<17:29,  2.28it/s, v_num=1, train_loss_step=2.42e+6, train_loss_epoch=2.43e+6]Epoch 613/3000:  20%|██        | 613/3000 [04:27<17:32,  2.27it/s, v_num=1, train_loss_step=2.42e+6, train_loss_epoch=2.43e+6]Epoch 613/3000:  20%|██        | 613/3000 [04:27<17:32,  2.27it/s, v_num=1, train_loss_step=2.45e+6, train_loss_epoch=2.43e+6]Epoch 614/3000:  20%|██        | 613/3000 [04:27<17:32,  2.27it/s, v_num=1, train_loss_step=2.45e+6, train_loss_epoch=2.43e+6]Epoch 614/3000:  20%|██        | 614/3000 [04:27<16:51,  2.36it/s, v_num=1, train_loss_step=2.45e+6, train_loss_epoch=2.43e+6]Epoch 614/3000:  20%|██        | 614/3000 [04:27<16:51,  2.36it/s, v_num=1, train_loss_step=2.31e+6, train_loss_epoch=2.42e+6]Epoch 615/3000:  20%|██        | 614/3000 [04:27<16:51,  2.36it/s, v_num=1, train_loss_step=2.31e+6, train_loss_epoch=2.42e+6]Epoch 615/3000:  20%|██        | 615/3000 [04:28<17:09,  2.32it/s, v_num=1, train_loss_step=2.31e+6, train_loss_epoch=2.42e+6]Epoch 615/3000:  20%|██        | 615/3000 [04:28<17:09,  2.32it/s, v_num=1, train_loss_step=2.42e+6, train_loss_epoch=2.42e+6]Epoch 616/3000:  20%|██        | 615/3000 [04:28<17:09,  2.32it/s, v_num=1, train_loss_step=2.42e+6, train_loss_epoch=2.42e+6]Epoch 616/3000:  21%|██        | 616/3000 [04:28<17:59,  2.21it/s, v_num=1, train_loss_step=2.42e+6, train_loss_epoch=2.42e+6]Epoch 616/3000:  21%|██        | 616/3000 [04:28<17:59,  2.21it/s, v_num=1, train_loss_step=2.42e+6, train_loss_epoch=2.42e+6]Epoch 617/3000:  21%|██        | 616/3000 [04:28<17:59,  2.21it/s, v_num=1, train_loss_step=2.42e+6, train_loss_epoch=2.42e+6]Epoch 617/3000:  21%|██        | 617/3000 [04:29<17:59,  2.21it/s, v_num=1, train_loss_step=2.42e+6, train_loss_epoch=2.42e+6]Epoch 617/3000:  21%|██        | 617/3000 [04:29<17:59,  2.21it/s, v_num=1, train_loss_step=2.39e+6, train_loss_epoch=2.42e+6]Epoch 618/3000:  21%|██        | 617/3000 [04:29<17:59,  2.21it/s, v_num=1, train_loss_step=2.39e+6, train_loss_epoch=2.42e+6]Epoch 618/3000:  21%|██        | 618/3000 [04:29<17:18,  2.29it/s, v_num=1, train_loss_step=2.39e+6, train_loss_epoch=2.42e+6]Epoch 618/3000:  21%|██        | 618/3000 [04:29<17:18,  2.29it/s, v_num=1, train_loss_step=2.46e+6, train_loss_epoch=2.41e+6]Epoch 619/3000:  21%|██        | 618/3000 [04:29<17:18,  2.29it/s, v_num=1, train_loss_step=2.46e+6, train_loss_epoch=2.41e+6]Epoch 619/3000:  21%|██        | 619/3000 [04:30<17:23,  2.28it/s, v_num=1, train_loss_step=2.46e+6, train_loss_epoch=2.41e+6]Epoch 619/3000:  21%|██        | 619/3000 [04:30<17:23,  2.28it/s, v_num=1, train_loss_step=2.5e+6, train_loss_epoch=2.41e+6] Epoch 620/3000:  21%|██        | 619/3000 [04:30<17:23,  2.28it/s, v_num=1, train_loss_step=2.5e+6, train_loss_epoch=2.41e+6]Epoch 620/3000:  21%|██        | 620/3000 [04:30<16:32,  2.40it/s, v_num=1, train_loss_step=2.5e+6, train_loss_epoch=2.41e+6]Epoch 620/3000:  21%|██        | 620/3000 [04:30<16:32,  2.40it/s, v_num=1, train_loss_step=2.43e+6, train_loss_epoch=2.41e+6]Epoch 621/3000:  21%|██        | 620/3000 [04:30<16:32,  2.40it/s, v_num=1, train_loss_step=2.43e+6, train_loss_epoch=2.41e+6]Epoch 621/3000:  21%|██        | 621/3000 [04:30<16:45,  2.37it/s, v_num=1, train_loss_step=2.43e+6, train_loss_epoch=2.41e+6]Epoch 621/3000:  21%|██        | 621/3000 [04:30<16:45,  2.37it/s, v_num=1, train_loss_step=2.49e+6, train_loss_epoch=2.41e+6]Epoch 622/3000:  21%|██        | 621/3000 [04:30<16:45,  2.37it/s, v_num=1, train_loss_step=2.49e+6, train_loss_epoch=2.41e+6]Epoch 622/3000:  21%|██        | 622/3000 [04:31<16:55,  2.34it/s, v_num=1, train_loss_step=2.49e+6, train_loss_epoch=2.41e+6]Epoch 622/3000:  21%|██        | 622/3000 [04:31<16:55,  2.34it/s, v_num=1, train_loss_step=2.55e+6, train_loss_epoch=2.4e+6] Epoch 623/3000:  21%|██        | 622/3000 [04:31<16:55,  2.34it/s, v_num=1, train_loss_step=2.55e+6, train_loss_epoch=2.4e+6]Epoch 623/3000:  21%|██        | 623/3000 [04:31<17:59,  2.20it/s, v_num=1, train_loss_step=2.55e+6, train_loss_epoch=2.4e+6]Epoch 623/3000:  21%|██        | 623/3000 [04:31<17:59,  2.20it/s, v_num=1, train_loss_step=2.43e+6, train_loss_epoch=2.4e+6]Epoch 624/3000:  21%|██        | 623/3000 [04:31<17:59,  2.20it/s, v_num=1, train_loss_step=2.43e+6, train_loss_epoch=2.4e+6]Epoch 624/3000:  21%|██        | 624/3000 [04:32<17:39,  2.24it/s, v_num=1, train_loss_step=2.43e+6, train_loss_epoch=2.4e+6]Epoch 624/3000:  21%|██        | 624/3000 [04:32<17:39,  2.24it/s, v_num=1, train_loss_step=2.27e+6, train_loss_epoch=2.4e+6]Epoch 625/3000:  21%|██        | 624/3000 [04:32<17:39,  2.24it/s, v_num=1, train_loss_step=2.27e+6, train_loss_epoch=2.4e+6]Epoch 625/3000:  21%|██        | 625/3000 [04:32<17:26,  2.27it/s, v_num=1, train_loss_step=2.27e+6, train_loss_epoch=2.4e+6]Epoch 625/3000:  21%|██        | 625/3000 [04:32<17:26,  2.27it/s, v_num=1, train_loss_step=2.43e+6, train_loss_epoch=2.39e+6]Epoch 626/3000:  21%|██        | 625/3000 [04:32<17:26,  2.27it/s, v_num=1, train_loss_step=2.43e+6, train_loss_epoch=2.39e+6]Epoch 626/3000:  21%|██        | 626/3000 [04:33<17:07,  2.31it/s, v_num=1, train_loss_step=2.43e+6, train_loss_epoch=2.39e+6]Epoch 626/3000:  21%|██        | 626/3000 [04:33<17:07,  2.31it/s, v_num=1, train_loss_step=2.46e+6, train_loss_epoch=2.39e+6]Epoch 627/3000:  21%|██        | 626/3000 [04:33<17:07,  2.31it/s, v_num=1, train_loss_step=2.46e+6, train_loss_epoch=2.39e+6]Epoch 627/3000:  21%|██        | 627/3000 [04:33<16:39,  2.37it/s, v_num=1, train_loss_step=2.46e+6, train_loss_epoch=2.39e+6]Epoch 627/3000:  21%|██        | 627/3000 [04:33<16:39,  2.37it/s, v_num=1, train_loss_step=2.46e+6, train_loss_epoch=2.39e+6]Epoch 628/3000:  21%|██        | 627/3000 [04:33<16:39,  2.37it/s, v_num=1, train_loss_step=2.46e+6, train_loss_epoch=2.39e+6]Epoch 628/3000:  21%|██        | 628/3000 [04:33<17:05,  2.31it/s, v_num=1, train_loss_step=2.46e+6, train_loss_epoch=2.39e+6]Epoch 628/3000:  21%|██        | 628/3000 [04:33<17:05,  2.31it/s, v_num=1, train_loss_step=2.26e+6, train_loss_epoch=2.39e+6]Epoch 629/3000:  21%|██        | 628/3000 [04:33<17:05,  2.31it/s, v_num=1, train_loss_step=2.26e+6, train_loss_epoch=2.39e+6]Epoch 629/3000:  21%|██        | 629/3000 [04:34<17:39,  2.24it/s, v_num=1, train_loss_step=2.26e+6, train_loss_epoch=2.39e+6]Epoch 629/3000:  21%|██        | 629/3000 [04:34<17:39,  2.24it/s, v_num=1, train_loss_step=2.26e+6, train_loss_epoch=2.38e+6]Epoch 630/3000:  21%|██        | 629/3000 [04:34<17:39,  2.24it/s, v_num=1, train_loss_step=2.26e+6, train_loss_epoch=2.38e+6]Epoch 630/3000:  21%|██        | 630/3000 [04:34<17:12,  2.30it/s, v_num=1, train_loss_step=2.26e+6, train_loss_epoch=2.38e+6]Epoch 630/3000:  21%|██        | 630/3000 [04:34<17:12,  2.30it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.38e+6]Epoch 631/3000:  21%|██        | 630/3000 [04:34<17:12,  2.30it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.38e+6]Epoch 631/3000:  21%|██        | 631/3000 [04:35<17:22,  2.27it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.38e+6]Epoch 631/3000:  21%|██        | 631/3000 [04:35<17:22,  2.27it/s, v_num=1, train_loss_step=2.43e+6, train_loss_epoch=2.38e+6]Epoch 632/3000:  21%|██        | 631/3000 [04:35<17:22,  2.27it/s, v_num=1, train_loss_step=2.43e+6, train_loss_epoch=2.38e+6]Epoch 632/3000:  21%|██        | 632/3000 [04:35<16:53,  2.34it/s, v_num=1, train_loss_step=2.43e+6, train_loss_epoch=2.38e+6]Epoch 632/3000:  21%|██        | 632/3000 [04:35<16:53,  2.34it/s, v_num=1, train_loss_step=2.38e+6, train_loss_epoch=2.38e+6]Epoch 633/3000:  21%|██        | 632/3000 [04:35<16:53,  2.34it/s, v_num=1, train_loss_step=2.38e+6, train_loss_epoch=2.38e+6]Epoch 633/3000:  21%|██        | 633/3000 [04:36<17:22,  2.27it/s, v_num=1, train_loss_step=2.38e+6, train_loss_epoch=2.38e+6]Epoch 633/3000:  21%|██        | 633/3000 [04:36<17:22,  2.27it/s, v_num=1, train_loss_step=2.5e+6, train_loss_epoch=2.37e+6] Epoch 634/3000:  21%|██        | 633/3000 [04:36<17:22,  2.27it/s, v_num=1, train_loss_step=2.5e+6, train_loss_epoch=2.37e+6]Epoch 634/3000:  21%|██        | 634/3000 [04:36<18:22,  2.15it/s, v_num=1, train_loss_step=2.5e+6, train_loss_epoch=2.37e+6]Epoch 634/3000:  21%|██        | 634/3000 [04:36<18:22,  2.15it/s, v_num=1, train_loss_step=2.37e+6, train_loss_epoch=2.37e+6]Epoch 635/3000:  21%|██        | 634/3000 [04:36<18:22,  2.15it/s, v_num=1, train_loss_step=2.37e+6, train_loss_epoch=2.37e+6]Epoch 635/3000:  21%|██        | 635/3000 [04:37<18:12,  2.16it/s, v_num=1, train_loss_step=2.37e+6, train_loss_epoch=2.37e+6]Epoch 635/3000:  21%|██        | 635/3000 [04:37<18:12,  2.16it/s, v_num=1, train_loss_step=2.25e+6, train_loss_epoch=2.37e+6]Epoch 636/3000:  21%|██        | 635/3000 [04:37<18:12,  2.16it/s, v_num=1, train_loss_step=2.25e+6, train_loss_epoch=2.37e+6]Epoch 636/3000:  21%|██        | 636/3000 [04:37<17:34,  2.24it/s, v_num=1, train_loss_step=2.25e+6, train_loss_epoch=2.37e+6]Epoch 636/3000:  21%|██        | 636/3000 [04:37<17:34,  2.24it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.37e+6]Epoch 637/3000:  21%|██        | 636/3000 [04:37<17:34,  2.24it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.37e+6]Epoch 637/3000:  21%|██        | 637/3000 [04:38<18:00,  2.19it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.37e+6]Epoch 637/3000:  21%|██        | 637/3000 [04:38<18:00,  2.19it/s, v_num=1, train_loss_step=2.43e+6, train_loss_epoch=2.36e+6]Epoch 638/3000:  21%|██        | 637/3000 [04:38<18:00,  2.19it/s, v_num=1, train_loss_step=2.43e+6, train_loss_epoch=2.36e+6]Epoch 638/3000:  21%|██▏       | 638/3000 [04:38<16:57,  2.32it/s, v_num=1, train_loss_step=2.43e+6, train_loss_epoch=2.36e+6]Epoch 638/3000:  21%|██▏       | 638/3000 [04:38<16:57,  2.32it/s, v_num=1, train_loss_step=2.37e+6, train_loss_epoch=2.36e+6]Epoch 639/3000:  21%|██▏       | 638/3000 [04:38<16:57,  2.32it/s, v_num=1, train_loss_step=2.37e+6, train_loss_epoch=2.36e+6]Epoch 639/3000:  21%|██▏       | 639/3000 [04:38<17:23,  2.26it/s, v_num=1, train_loss_step=2.37e+6, train_loss_epoch=2.36e+6]Epoch 639/3000:  21%|██▏       | 639/3000 [04:38<17:23,  2.26it/s, v_num=1, train_loss_step=2.38e+6, train_loss_epoch=2.36e+6]Epoch 640/3000:  21%|██▏       | 639/3000 [04:38<17:23,  2.26it/s, v_num=1, train_loss_step=2.38e+6, train_loss_epoch=2.36e+6]Epoch 640/3000:  21%|██▏       | 640/3000 [04:39<17:30,  2.25it/s, v_num=1, train_loss_step=2.38e+6, train_loss_epoch=2.36e+6]Epoch 640/3000:  21%|██▏       | 640/3000 [04:39<17:30,  2.25it/s, v_num=1, train_loss_step=2.41e+6, train_loss_epoch=2.36e+6]Epoch 641/3000:  21%|██▏       | 640/3000 [04:39<17:30,  2.25it/s, v_num=1, train_loss_step=2.41e+6, train_loss_epoch=2.36e+6]Epoch 641/3000:  21%|██▏       | 641/3000 [04:39<17:12,  2.28it/s, v_num=1, train_loss_step=2.41e+6, train_loss_epoch=2.36e+6]Epoch 641/3000:  21%|██▏       | 641/3000 [04:39<17:12,  2.28it/s, v_num=1, train_loss_step=2.42e+6, train_loss_epoch=2.35e+6]Epoch 642/3000:  21%|██▏       | 641/3000 [04:39<17:12,  2.28it/s, v_num=1, train_loss_step=2.42e+6, train_loss_epoch=2.35e+6]Epoch 642/3000:  21%|██▏       | 642/3000 [04:40<16:51,  2.33it/s, v_num=1, train_loss_step=2.42e+6, train_loss_epoch=2.35e+6]Epoch 642/3000:  21%|██▏       | 642/3000 [04:40<16:51,  2.33it/s, v_num=1, train_loss_step=2.31e+6, train_loss_epoch=2.35e+6]Epoch 643/3000:  21%|██▏       | 642/3000 [04:40<16:51,  2.33it/s, v_num=1, train_loss_step=2.31e+6, train_loss_epoch=2.35e+6]Epoch 643/3000:  21%|██▏       | 643/3000 [04:40<17:15,  2.28it/s, v_num=1, train_loss_step=2.31e+6, train_loss_epoch=2.35e+6]Epoch 643/3000:  21%|██▏       | 643/3000 [04:40<17:15,  2.28it/s, v_num=1, train_loss_step=2.31e+6, train_loss_epoch=2.35e+6]Epoch 644/3000:  21%|██▏       | 643/3000 [04:40<17:15,  2.28it/s, v_num=1, train_loss_step=2.31e+6, train_loss_epoch=2.35e+6]Epoch 644/3000:  21%|██▏       | 644/3000 [04:41<17:42,  2.22it/s, v_num=1, train_loss_step=2.31e+6, train_loss_epoch=2.35e+6]Epoch 644/3000:  21%|██▏       | 644/3000 [04:41<17:42,  2.22it/s, v_num=1, train_loss_step=2.31e+6, train_loss_epoch=2.35e+6]Epoch 645/3000:  21%|██▏       | 644/3000 [04:41<17:42,  2.22it/s, v_num=1, train_loss_step=2.31e+6, train_loss_epoch=2.35e+6]Epoch 645/3000:  22%|██▏       | 645/3000 [04:41<17:40,  2.22it/s, v_num=1, train_loss_step=2.31e+6, train_loss_epoch=2.35e+6]Epoch 645/3000:  22%|██▏       | 645/3000 [04:41<17:40,  2.22it/s, v_num=1, train_loss_step=2.3e+6, train_loss_epoch=2.34e+6] Epoch 646/3000:  22%|██▏       | 645/3000 [04:41<17:40,  2.22it/s, v_num=1, train_loss_step=2.3e+6, train_loss_epoch=2.34e+6]Epoch 646/3000:  22%|██▏       | 646/3000 [04:41<17:05,  2.29it/s, v_num=1, train_loss_step=2.3e+6, train_loss_epoch=2.34e+6]Epoch 646/3000:  22%|██▏       | 646/3000 [04:41<17:05,  2.29it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.34e+6]Epoch 647/3000:  22%|██▏       | 646/3000 [04:41<17:05,  2.29it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.34e+6]Epoch 647/3000:  22%|██▏       | 647/3000 [04:42<16:38,  2.36it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.34e+6]Epoch 647/3000:  22%|██▏       | 647/3000 [04:42<16:38,  2.36it/s, v_num=1, train_loss_step=2.26e+6, train_loss_epoch=2.34e+6]Epoch 648/3000:  22%|██▏       | 647/3000 [04:42<16:38,  2.36it/s, v_num=1, train_loss_step=2.26e+6, train_loss_epoch=2.34e+6]Epoch 648/3000:  22%|██▏       | 648/3000 [04:42<16:29,  2.38it/s, v_num=1, train_loss_step=2.26e+6, train_loss_epoch=2.34e+6]Epoch 648/3000:  22%|██▏       | 648/3000 [04:42<16:29,  2.38it/s, v_num=1, train_loss_step=2.34e+6, train_loss_epoch=2.34e+6]Epoch 649/3000:  22%|██▏       | 648/3000 [04:42<16:29,  2.38it/s, v_num=1, train_loss_step=2.34e+6, train_loss_epoch=2.34e+6]Epoch 649/3000:  22%|██▏       | 649/3000 [04:43<17:13,  2.27it/s, v_num=1, train_loss_step=2.34e+6, train_loss_epoch=2.34e+6]Epoch 649/3000:  22%|██▏       | 649/3000 [04:43<17:13,  2.27it/s, v_num=1, train_loss_step=2.23e+6, train_loss_epoch=2.33e+6]Epoch 650/3000:  22%|██▏       | 649/3000 [04:43<17:13,  2.27it/s, v_num=1, train_loss_step=2.23e+6, train_loss_epoch=2.33e+6]Epoch 650/3000:  22%|██▏       | 650/3000 [04:43<17:12,  2.28it/s, v_num=1, train_loss_step=2.23e+6, train_loss_epoch=2.33e+6]Epoch 650/3000:  22%|██▏       | 650/3000 [04:43<17:12,  2.28it/s, v_num=1, train_loss_step=2.31e+6, train_loss_epoch=2.33e+6]Epoch 651/3000:  22%|██▏       | 650/3000 [04:43<17:12,  2.28it/s, v_num=1, train_loss_step=2.31e+6, train_loss_epoch=2.33e+6]Epoch 651/3000:  22%|██▏       | 651/3000 [04:44<16:32,  2.37it/s, v_num=1, train_loss_step=2.31e+6, train_loss_epoch=2.33e+6]Epoch 651/3000:  22%|██▏       | 651/3000 [04:44<16:32,  2.37it/s, v_num=1, train_loss_step=2.39e+6, train_loss_epoch=2.33e+6]Epoch 652/3000:  22%|██▏       | 651/3000 [04:44<16:32,  2.37it/s, v_num=1, train_loss_step=2.39e+6, train_loss_epoch=2.33e+6]Epoch 652/3000:  22%|██▏       | 652/3000 [04:44<16:46,  2.33it/s, v_num=1, train_loss_step=2.39e+6, train_loss_epoch=2.33e+6]Epoch 652/3000:  22%|██▏       | 652/3000 [04:44<16:46,  2.33it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.33e+6]Epoch 653/3000:  22%|██▏       | 652/3000 [04:44<16:46,  2.33it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.33e+6]Epoch 653/3000:  22%|██▏       | 653/3000 [04:44<15:44,  2.49it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.33e+6]Epoch 653/3000:  22%|██▏       | 653/3000 [04:44<15:44,  2.49it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.33e+6]Epoch 654/3000:  22%|██▏       | 653/3000 [04:44<15:44,  2.49it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.33e+6]Epoch 654/3000:  22%|██▏       | 654/3000 [04:45<15:18,  2.55it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.33e+6]Epoch 654/3000:  22%|██▏       | 654/3000 [04:45<15:18,  2.55it/s, v_num=1, train_loss_step=2.4e+6, train_loss_epoch=2.32e+6] Epoch 655/3000:  22%|██▏       | 654/3000 [04:45<15:18,  2.55it/s, v_num=1, train_loss_step=2.4e+6, train_loss_epoch=2.32e+6]Epoch 655/3000:  22%|██▏       | 655/3000 [04:45<16:05,  2.43it/s, v_num=1, train_loss_step=2.4e+6, train_loss_epoch=2.32e+6]Epoch 655/3000:  22%|██▏       | 655/3000 [04:45<16:05,  2.43it/s, v_num=1, train_loss_step=2.4e+6, train_loss_epoch=2.32e+6]Epoch 656/3000:  22%|██▏       | 655/3000 [04:45<16:05,  2.43it/s, v_num=1, train_loss_step=2.4e+6, train_loss_epoch=2.32e+6]Epoch 656/3000:  22%|██▏       | 656/3000 [04:46<16:50,  2.32it/s, v_num=1, train_loss_step=2.4e+6, train_loss_epoch=2.32e+6]Epoch 656/3000:  22%|██▏       | 656/3000 [04:46<16:50,  2.32it/s, v_num=1, train_loss_step=2.42e+6, train_loss_epoch=2.32e+6]Epoch 657/3000:  22%|██▏       | 656/3000 [04:46<16:50,  2.32it/s, v_num=1, train_loss_step=2.42e+6, train_loss_epoch=2.32e+6]Epoch 657/3000:  22%|██▏       | 657/3000 [04:46<17:33,  2.22it/s, v_num=1, train_loss_step=2.42e+6, train_loss_epoch=2.32e+6]Epoch 657/3000:  22%|██▏       | 657/3000 [04:46<17:33,  2.22it/s, v_num=1, train_loss_step=2.4e+6, train_loss_epoch=2.32e+6] Epoch 658/3000:  22%|██▏       | 657/3000 [04:46<17:33,  2.22it/s, v_num=1, train_loss_step=2.4e+6, train_loss_epoch=2.32e+6]Epoch 658/3000:  22%|██▏       | 658/3000 [04:47<18:17,  2.13it/s, v_num=1, train_loss_step=2.4e+6, train_loss_epoch=2.32e+6]Epoch 658/3000:  22%|██▏       | 658/3000 [04:47<18:17,  2.13it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.31e+6]Epoch 659/3000:  22%|██▏       | 658/3000 [04:47<18:17,  2.13it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.31e+6]Epoch 659/3000:  22%|██▏       | 659/3000 [04:47<17:21,  2.25it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.31e+6]Epoch 659/3000:  22%|██▏       | 659/3000 [04:47<17:21,  2.25it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.31e+6]Epoch 660/3000:  22%|██▏       | 659/3000 [04:47<17:21,  2.25it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.31e+6]Epoch 660/3000:  22%|██▏       | 660/3000 [04:47<16:41,  2.34it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.31e+6]Epoch 660/3000:  22%|██▏       | 660/3000 [04:47<16:41,  2.34it/s, v_num=1, train_loss_step=2.36e+6, train_loss_epoch=2.31e+6]Epoch 661/3000:  22%|██▏       | 660/3000 [04:47<16:41,  2.34it/s, v_num=1, train_loss_step=2.36e+6, train_loss_epoch=2.31e+6]Epoch 661/3000:  22%|██▏       | 661/3000 [04:48<17:05,  2.28it/s, v_num=1, train_loss_step=2.36e+6, train_loss_epoch=2.31e+6]Epoch 661/3000:  22%|██▏       | 661/3000 [04:48<17:05,  2.28it/s, v_num=1, train_loss_step=2.21e+6, train_loss_epoch=2.31e+6]Epoch 662/3000:  22%|██▏       | 661/3000 [04:48<17:05,  2.28it/s, v_num=1, train_loss_step=2.21e+6, train_loss_epoch=2.31e+6]Epoch 662/3000:  22%|██▏       | 662/3000 [04:48<16:52,  2.31it/s, v_num=1, train_loss_step=2.21e+6, train_loss_epoch=2.31e+6]Epoch 662/3000:  22%|██▏       | 662/3000 [04:48<16:52,  2.31it/s, v_num=1, train_loss_step=2.29e+6, train_loss_epoch=2.3e+6] Epoch 663/3000:  22%|██▏       | 662/3000 [04:48<16:52,  2.31it/s, v_num=1, train_loss_step=2.29e+6, train_loss_epoch=2.3e+6]Epoch 663/3000:  22%|██▏       | 663/3000 [04:49<17:00,  2.29it/s, v_num=1, train_loss_step=2.29e+6, train_loss_epoch=2.3e+6]Epoch 663/3000:  22%|██▏       | 663/3000 [04:49<17:00,  2.29it/s, v_num=1, train_loss_step=2.28e+6, train_loss_epoch=2.3e+6]Epoch 664/3000:  22%|██▏       | 663/3000 [04:49<17:00,  2.29it/s, v_num=1, train_loss_step=2.28e+6, train_loss_epoch=2.3e+6]Epoch 664/3000:  22%|██▏       | 664/3000 [04:49<16:31,  2.36it/s, v_num=1, train_loss_step=2.28e+6, train_loss_epoch=2.3e+6]Epoch 664/3000:  22%|██▏       | 664/3000 [04:49<16:31,  2.36it/s, v_num=1, train_loss_step=2.4e+6, train_loss_epoch=2.3e+6] Epoch 665/3000:  22%|██▏       | 664/3000 [04:49<16:31,  2.36it/s, v_num=1, train_loss_step=2.4e+6, train_loss_epoch=2.3e+6]Epoch 665/3000:  22%|██▏       | 665/3000 [04:50<16:27,  2.37it/s, v_num=1, train_loss_step=2.4e+6, train_loss_epoch=2.3e+6]Epoch 665/3000:  22%|██▏       | 665/3000 [04:50<16:27,  2.37it/s, v_num=1, train_loss_step=2.15e+6, train_loss_epoch=2.3e+6]Epoch 666/3000:  22%|██▏       | 665/3000 [04:50<16:27,  2.37it/s, v_num=1, train_loss_step=2.15e+6, train_loss_epoch=2.3e+6]Epoch 666/3000:  22%|██▏       | 666/3000 [04:50<15:58,  2.44it/s, v_num=1, train_loss_step=2.15e+6, train_loss_epoch=2.3e+6]Epoch 666/3000:  22%|██▏       | 666/3000 [04:50<15:58,  2.44it/s, v_num=1, train_loss_step=2.38e+6, train_loss_epoch=2.3e+6]Epoch 667/3000:  22%|██▏       | 666/3000 [04:50<15:58,  2.44it/s, v_num=1, train_loss_step=2.38e+6, train_loss_epoch=2.3e+6]Epoch 667/3000:  22%|██▏       | 667/3000 [04:50<16:20,  2.38it/s, v_num=1, train_loss_step=2.38e+6, train_loss_epoch=2.3e+6]Epoch 667/3000:  22%|██▏       | 667/3000 [04:50<16:20,  2.38it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.29e+6]Epoch 668/3000:  22%|██▏       | 667/3000 [04:50<16:20,  2.38it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.29e+6]Epoch 668/3000:  22%|██▏       | 668/3000 [04:51<16:10,  2.40it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.29e+6]Epoch 668/3000:  22%|██▏       | 668/3000 [04:51<16:10,  2.40it/s, v_num=1, train_loss_step=2.11e+6, train_loss_epoch=2.29e+6]Epoch 669/3000:  22%|██▏       | 668/3000 [04:51<16:10,  2.40it/s, v_num=1, train_loss_step=2.11e+6, train_loss_epoch=2.29e+6]Epoch 669/3000:  22%|██▏       | 669/3000 [04:51<16:22,  2.37it/s, v_num=1, train_loss_step=2.11e+6, train_loss_epoch=2.29e+6]Epoch 669/3000:  22%|██▏       | 669/3000 [04:51<16:22,  2.37it/s, v_num=1, train_loss_step=2.31e+6, train_loss_epoch=2.29e+6]Epoch 670/3000:  22%|██▏       | 669/3000 [04:51<16:22,  2.37it/s, v_num=1, train_loss_step=2.31e+6, train_loss_epoch=2.29e+6]Epoch 670/3000:  22%|██▏       | 670/3000 [04:52<15:46,  2.46it/s, v_num=1, train_loss_step=2.31e+6, train_loss_epoch=2.29e+6]Epoch 670/3000:  22%|██▏       | 670/3000 [04:52<15:46,  2.46it/s, v_num=1, train_loss_step=2.46e+6, train_loss_epoch=2.29e+6]Epoch 671/3000:  22%|██▏       | 670/3000 [04:52<15:46,  2.46it/s, v_num=1, train_loss_step=2.46e+6, train_loss_epoch=2.29e+6]Epoch 671/3000:  22%|██▏       | 671/3000 [04:52<14:48,  2.62it/s, v_num=1, train_loss_step=2.46e+6, train_loss_epoch=2.29e+6]Epoch 671/3000:  22%|██▏       | 671/3000 [04:52<14:48,  2.62it/s, v_num=1, train_loss_step=2.45e+6, train_loss_epoch=2.28e+6]Epoch 672/3000:  22%|██▏       | 671/3000 [04:52<14:48,  2.62it/s, v_num=1, train_loss_step=2.45e+6, train_loss_epoch=2.28e+6]Epoch 672/3000:  22%|██▏       | 672/3000 [04:52<15:53,  2.44it/s, v_num=1, train_loss_step=2.45e+6, train_loss_epoch=2.28e+6]Epoch 672/3000:  22%|██▏       | 672/3000 [04:52<15:53,  2.44it/s, v_num=1, train_loss_step=2.27e+6, train_loss_epoch=2.28e+6]Epoch 673/3000:  22%|██▏       | 672/3000 [04:52<15:53,  2.44it/s, v_num=1, train_loss_step=2.27e+6, train_loss_epoch=2.28e+6]Epoch 673/3000:  22%|██▏       | 673/3000 [04:53<16:40,  2.33it/s, v_num=1, train_loss_step=2.27e+6, train_loss_epoch=2.28e+6]Epoch 673/3000:  22%|██▏       | 673/3000 [04:53<16:40,  2.33it/s, v_num=1, train_loss_step=2.17e+6, train_loss_epoch=2.28e+6]Epoch 674/3000:  22%|██▏       | 673/3000 [04:53<16:40,  2.33it/s, v_num=1, train_loss_step=2.17e+6, train_loss_epoch=2.28e+6]Epoch 674/3000:  22%|██▏       | 674/3000 [04:53<17:06,  2.27it/s, v_num=1, train_loss_step=2.17e+6, train_loss_epoch=2.28e+6]Epoch 674/3000:  22%|██▏       | 674/3000 [04:53<17:06,  2.27it/s, v_num=1, train_loss_step=2.28e+6, train_loss_epoch=2.28e+6]Epoch 675/3000:  22%|██▏       | 674/3000 [04:53<17:06,  2.27it/s, v_num=1, train_loss_step=2.28e+6, train_loss_epoch=2.28e+6]Epoch 675/3000:  22%|██▎       | 675/3000 [04:54<16:42,  2.32it/s, v_num=1, train_loss_step=2.28e+6, train_loss_epoch=2.28e+6]Epoch 675/3000:  22%|██▎       | 675/3000 [04:54<16:42,  2.32it/s, v_num=1, train_loss_step=2.34e+6, train_loss_epoch=2.28e+6]Epoch 676/3000:  22%|██▎       | 675/3000 [04:54<16:42,  2.32it/s, v_num=1, train_loss_step=2.34e+6, train_loss_epoch=2.28e+6]Epoch 676/3000:  23%|██▎       | 676/3000 [04:54<15:52,  2.44it/s, v_num=1, train_loss_step=2.34e+6, train_loss_epoch=2.28e+6]Epoch 676/3000:  23%|██▎       | 676/3000 [04:54<15:52,  2.44it/s, v_num=1, train_loss_step=2.23e+6, train_loss_epoch=2.27e+6]Epoch 677/3000:  23%|██▎       | 676/3000 [04:54<15:52,  2.44it/s, v_num=1, train_loss_step=2.23e+6, train_loss_epoch=2.27e+6]Epoch 677/3000:  23%|██▎       | 677/3000 [04:55<16:11,  2.39it/s, v_num=1, train_loss_step=2.23e+6, train_loss_epoch=2.27e+6]Epoch 677/3000:  23%|██▎       | 677/3000 [04:55<16:11,  2.39it/s, v_num=1, train_loss_step=2.27e+6, train_loss_epoch=2.27e+6]Epoch 678/3000:  23%|██▎       | 677/3000 [04:55<16:11,  2.39it/s, v_num=1, train_loss_step=2.27e+6, train_loss_epoch=2.27e+6]Epoch 678/3000:  23%|██▎       | 678/3000 [04:55<16:12,  2.39it/s, v_num=1, train_loss_step=2.27e+6, train_loss_epoch=2.27e+6]Epoch 678/3000:  23%|██▎       | 678/3000 [04:55<16:12,  2.39it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.27e+6]Epoch 679/3000:  23%|██▎       | 678/3000 [04:55<16:12,  2.39it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.27e+6]Epoch 679/3000:  23%|██▎       | 679/3000 [04:55<15:37,  2.48it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.27e+6]Epoch 679/3000:  23%|██▎       | 679/3000 [04:55<15:37,  2.48it/s, v_num=1, train_loss_step=2.26e+6, train_loss_epoch=2.27e+6]Epoch 680/3000:  23%|██▎       | 679/3000 [04:55<15:37,  2.48it/s, v_num=1, train_loss_step=2.26e+6, train_loss_epoch=2.27e+6]Epoch 680/3000:  23%|██▎       | 680/3000 [04:56<16:14,  2.38it/s, v_num=1, train_loss_step=2.26e+6, train_loss_epoch=2.27e+6]Epoch 680/3000:  23%|██▎       | 680/3000 [04:56<16:14,  2.38it/s, v_num=1, train_loss_step=2.31e+6, train_loss_epoch=2.26e+6]Epoch 681/3000:  23%|██▎       | 680/3000 [04:56<16:14,  2.38it/s, v_num=1, train_loss_step=2.31e+6, train_loss_epoch=2.26e+6]Epoch 681/3000:  23%|██▎       | 681/3000 [04:56<17:20,  2.23it/s, v_num=1, train_loss_step=2.31e+6, train_loss_epoch=2.26e+6]Epoch 681/3000:  23%|██▎       | 681/3000 [04:56<17:20,  2.23it/s, v_num=1, train_loss_step=2.19e+6, train_loss_epoch=2.26e+6]Epoch 682/3000:  23%|██▎       | 681/3000 [04:56<17:20,  2.23it/s, v_num=1, train_loss_step=2.19e+6, train_loss_epoch=2.26e+6]Epoch 682/3000:  23%|██▎       | 682/3000 [04:57<17:46,  2.17it/s, v_num=1, train_loss_step=2.19e+6, train_loss_epoch=2.26e+6]Epoch 682/3000:  23%|██▎       | 682/3000 [04:57<17:46,  2.17it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.26e+6]Epoch 683/3000:  23%|██▎       | 682/3000 [04:57<17:46,  2.17it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.26e+6]Epoch 683/3000:  23%|██▎       | 683/3000 [04:57<17:40,  2.19it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.26e+6]Epoch 683/3000:  23%|██▎       | 683/3000 [04:57<17:40,  2.19it/s, v_num=1, train_loss_step=2.34e+6, train_loss_epoch=2.26e+6]Epoch 684/3000:  23%|██▎       | 683/3000 [04:57<17:40,  2.19it/s, v_num=1, train_loss_step=2.34e+6, train_loss_epoch=2.26e+6]Epoch 684/3000:  23%|██▎       | 684/3000 [04:58<17:17,  2.23it/s, v_num=1, train_loss_step=2.34e+6, train_loss_epoch=2.26e+6]Epoch 684/3000:  23%|██▎       | 684/3000 [04:58<17:17,  2.23it/s, v_num=1, train_loss_step=2.21e+6, train_loss_epoch=2.26e+6]Epoch 685/3000:  23%|██▎       | 684/3000 [04:58<17:17,  2.23it/s, v_num=1, train_loss_step=2.21e+6, train_loss_epoch=2.26e+6]Epoch 685/3000:  23%|██▎       | 685/3000 [04:58<17:05,  2.26it/s, v_num=1, train_loss_step=2.21e+6, train_loss_epoch=2.26e+6]Epoch 685/3000:  23%|██▎       | 685/3000 [04:58<17:05,  2.26it/s, v_num=1, train_loss_step=2.27e+6, train_loss_epoch=2.25e+6]Epoch 686/3000:  23%|██▎       | 685/3000 [04:58<17:05,  2.26it/s, v_num=1, train_loss_step=2.27e+6, train_loss_epoch=2.25e+6]Epoch 686/3000:  23%|██▎       | 686/3000 [04:59<16:32,  2.33it/s, v_num=1, train_loss_step=2.27e+6, train_loss_epoch=2.25e+6]Epoch 686/3000:  23%|██▎       | 686/3000 [04:59<16:32,  2.33it/s, v_num=1, train_loss_step=2.21e+6, train_loss_epoch=2.25e+6]Epoch 687/3000:  23%|██▎       | 686/3000 [04:59<16:32,  2.33it/s, v_num=1, train_loss_step=2.21e+6, train_loss_epoch=2.25e+6]Epoch 687/3000:  23%|██▎       | 687/3000 [04:59<16:43,  2.31it/s, v_num=1, train_loss_step=2.21e+6, train_loss_epoch=2.25e+6]Epoch 687/3000:  23%|██▎       | 687/3000 [04:59<16:43,  2.31it/s, v_num=1, train_loss_step=2.18e+6, train_loss_epoch=2.25e+6]Epoch 688/3000:  23%|██▎       | 687/3000 [04:59<16:43,  2.31it/s, v_num=1, train_loss_step=2.18e+6, train_loss_epoch=2.25e+6]Epoch 688/3000:  23%|██▎       | 688/3000 [04:59<16:34,  2.32it/s, v_num=1, train_loss_step=2.18e+6, train_loss_epoch=2.25e+6]Epoch 688/3000:  23%|██▎       | 688/3000 [04:59<16:34,  2.32it/s, v_num=1, train_loss_step=2.24e+6, train_loss_epoch=2.25e+6]Epoch 689/3000:  23%|██▎       | 688/3000 [04:59<16:34,  2.32it/s, v_num=1, train_loss_step=2.24e+6, train_loss_epoch=2.25e+6]Epoch 689/3000:  23%|██▎       | 689/3000 [05:00<16:42,  2.31it/s, v_num=1, train_loss_step=2.24e+6, train_loss_epoch=2.25e+6]Epoch 689/3000:  23%|██▎       | 689/3000 [05:00<16:42,  2.31it/s, v_num=1, train_loss_step=2.34e+6, train_loss_epoch=2.25e+6]Epoch 690/3000:  23%|██▎       | 689/3000 [05:00<16:42,  2.31it/s, v_num=1, train_loss_step=2.34e+6, train_loss_epoch=2.25e+6]Epoch 690/3000:  23%|██▎       | 690/3000 [05:00<16:58,  2.27it/s, v_num=1, train_loss_step=2.34e+6, train_loss_epoch=2.25e+6]Epoch 690/3000:  23%|██▎       | 690/3000 [05:00<16:58,  2.27it/s, v_num=1, train_loss_step=2.2e+6, train_loss_epoch=2.24e+6] Epoch 691/3000:  23%|██▎       | 690/3000 [05:00<16:58,  2.27it/s, v_num=1, train_loss_step=2.2e+6, train_loss_epoch=2.24e+6]Epoch 691/3000:  23%|██▎       | 691/3000 [05:01<16:44,  2.30it/s, v_num=1, train_loss_step=2.2e+6, train_loss_epoch=2.24e+6]Epoch 691/3000:  23%|██▎       | 691/3000 [05:01<16:44,  2.30it/s, v_num=1, train_loss_step=2.19e+6, train_loss_epoch=2.24e+6]Epoch 692/3000:  23%|██▎       | 691/3000 [05:01<16:44,  2.30it/s, v_num=1, train_loss_step=2.19e+6, train_loss_epoch=2.24e+6]Epoch 692/3000:  23%|██▎       | 692/3000 [05:01<16:48,  2.29it/s, v_num=1, train_loss_step=2.19e+6, train_loss_epoch=2.24e+6]Epoch 692/3000:  23%|██▎       | 692/3000 [05:01<16:48,  2.29it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.24e+6]Epoch 693/3000:  23%|██▎       | 692/3000 [05:01<16:48,  2.29it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.24e+6]Epoch 693/3000:  23%|██▎       | 693/3000 [05:02<18:08,  2.12it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.24e+6]Epoch 693/3000:  23%|██▎       | 693/3000 [05:02<18:08,  2.12it/s, v_num=1, train_loss_step=2.24e+6, train_loss_epoch=2.24e+6]Epoch 694/3000:  23%|██▎       | 693/3000 [05:02<18:08,  2.12it/s, v_num=1, train_loss_step=2.24e+6, train_loss_epoch=2.24e+6]Epoch 694/3000:  23%|██▎       | 694/3000 [05:02<17:38,  2.18it/s, v_num=1, train_loss_step=2.24e+6, train_loss_epoch=2.24e+6]Epoch 694/3000:  23%|██▎       | 694/3000 [05:02<17:38,  2.18it/s, v_num=1, train_loss_step=2.32e+6, train_loss_epoch=2.24e+6]Epoch 695/3000:  23%|██▎       | 694/3000 [05:02<17:38,  2.18it/s, v_num=1, train_loss_step=2.32e+6, train_loss_epoch=2.24e+6]Epoch 695/3000:  23%|██▎       | 695/3000 [05:03<17:06,  2.25it/s, v_num=1, train_loss_step=2.32e+6, train_loss_epoch=2.24e+6]Epoch 695/3000:  23%|██▎       | 695/3000 [05:03<17:06,  2.25it/s, v_num=1, train_loss_step=2.15e+6, train_loss_epoch=2.23e+6]Epoch 696/3000:  23%|██▎       | 695/3000 [05:03<17:06,  2.25it/s, v_num=1, train_loss_step=2.15e+6, train_loss_epoch=2.23e+6]Epoch 696/3000:  23%|██▎       | 696/3000 [05:03<17:02,  2.25it/s, v_num=1, train_loss_step=2.15e+6, train_loss_epoch=2.23e+6]Epoch 696/3000:  23%|██▎       | 696/3000 [05:03<17:02,  2.25it/s, v_num=1, train_loss_step=2.05e+6, train_loss_epoch=2.23e+6]Epoch 697/3000:  23%|██▎       | 696/3000 [05:03<17:02,  2.25it/s, v_num=1, train_loss_step=2.05e+6, train_loss_epoch=2.23e+6]Epoch 697/3000:  23%|██▎       | 697/3000 [05:03<17:17,  2.22it/s, v_num=1, train_loss_step=2.05e+6, train_loss_epoch=2.23e+6]Epoch 697/3000:  23%|██▎       | 697/3000 [05:03<17:17,  2.22it/s, v_num=1, train_loss_step=2.25e+6, train_loss_epoch=2.23e+6]Epoch 698/3000:  23%|██▎       | 697/3000 [05:03<17:17,  2.22it/s, v_num=1, train_loss_step=2.25e+6, train_loss_epoch=2.23e+6]Epoch 698/3000:  23%|██▎       | 698/3000 [05:04<17:38,  2.17it/s, v_num=1, train_loss_step=2.25e+6, train_loss_epoch=2.23e+6]Epoch 698/3000:  23%|██▎       | 698/3000 [05:04<17:38,  2.17it/s, v_num=1, train_loss_step=2.44e+6, train_loss_epoch=2.23e+6]Epoch 699/3000:  23%|██▎       | 698/3000 [05:04<17:38,  2.17it/s, v_num=1, train_loss_step=2.44e+6, train_loss_epoch=2.23e+6]Epoch 699/3000:  23%|██▎       | 699/3000 [05:04<17:21,  2.21it/s, v_num=1, train_loss_step=2.44e+6, train_loss_epoch=2.23e+6]Epoch 699/3000:  23%|██▎       | 699/3000 [05:04<17:21,  2.21it/s, v_num=1, train_loss_step=2.16e+6, train_loss_epoch=2.23e+6]Epoch 700/3000:  23%|██▎       | 699/3000 [05:04<17:21,  2.21it/s, v_num=1, train_loss_step=2.16e+6, train_loss_epoch=2.23e+6]Epoch 700/3000:  23%|██▎       | 700/3000 [05:05<17:40,  2.17it/s, v_num=1, train_loss_step=2.16e+6, train_loss_epoch=2.23e+6]Epoch 700/3000:  23%|██▎       | 700/3000 [05:05<17:40,  2.17it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.22e+6]Epoch 701/3000:  23%|██▎       | 700/3000 [05:05<17:40,  2.17it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.22e+6]Epoch 701/3000:  23%|██▎       | 701/3000 [05:05<17:42,  2.16it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.22e+6]Epoch 701/3000:  23%|██▎       | 701/3000 [05:05<17:42,  2.16it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.22e+6]Epoch 702/3000:  23%|██▎       | 701/3000 [05:05<17:42,  2.16it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.22e+6]Epoch 702/3000:  23%|██▎       | 702/3000 [05:06<17:43,  2.16it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.22e+6]Epoch 702/3000:  23%|██▎       | 702/3000 [05:06<17:43,  2.16it/s, v_num=1, train_loss_step=2.21e+6, train_loss_epoch=2.22e+6]Epoch 703/3000:  23%|██▎       | 702/3000 [05:06<17:43,  2.16it/s, v_num=1, train_loss_step=2.21e+6, train_loss_epoch=2.22e+6]Epoch 703/3000:  23%|██▎       | 703/3000 [05:06<16:52,  2.27it/s, v_num=1, train_loss_step=2.21e+6, train_loss_epoch=2.22e+6]Epoch 703/3000:  23%|██▎       | 703/3000 [05:06<16:52,  2.27it/s, v_num=1, train_loss_step=2.18e+6, train_loss_epoch=2.22e+6]Epoch 704/3000:  23%|██▎       | 703/3000 [05:06<16:52,  2.27it/s, v_num=1, train_loss_step=2.18e+6, train_loss_epoch=2.22e+6]Epoch 704/3000:  23%|██▎       | 704/3000 [05:07<16:53,  2.27it/s, v_num=1, train_loss_step=2.18e+6, train_loss_epoch=2.22e+6]Epoch 704/3000:  23%|██▎       | 704/3000 [05:07<16:53,  2.27it/s, v_num=1, train_loss_step=2.24e+6, train_loss_epoch=2.22e+6]Epoch 705/3000:  23%|██▎       | 704/3000 [05:07<16:53,  2.27it/s, v_num=1, train_loss_step=2.24e+6, train_loss_epoch=2.22e+6]Epoch 705/3000:  24%|██▎       | 705/3000 [05:07<16:21,  2.34it/s, v_num=1, train_loss_step=2.24e+6, train_loss_epoch=2.22e+6]Epoch 705/3000:  24%|██▎       | 705/3000 [05:07<16:21,  2.34it/s, v_num=1, train_loss_step=2.33e+6, train_loss_epoch=2.21e+6]Epoch 706/3000:  24%|██▎       | 705/3000 [05:07<16:21,  2.34it/s, v_num=1, train_loss_step=2.33e+6, train_loss_epoch=2.21e+6]Epoch 706/3000:  24%|██▎       | 706/3000 [05:07<16:11,  2.36it/s, v_num=1, train_loss_step=2.33e+6, train_loss_epoch=2.21e+6]Epoch 706/3000:  24%|██▎       | 706/3000 [05:07<16:11,  2.36it/s, v_num=1, train_loss_step=2.29e+6, train_loss_epoch=2.21e+6]Epoch 707/3000:  24%|██▎       | 706/3000 [05:07<16:11,  2.36it/s, v_num=1, train_loss_step=2.29e+6, train_loss_epoch=2.21e+6]Epoch 707/3000:  24%|██▎       | 707/3000 [05:08<16:28,  2.32it/s, v_num=1, train_loss_step=2.29e+6, train_loss_epoch=2.21e+6]Epoch 707/3000:  24%|██▎       | 707/3000 [05:08<16:28,  2.32it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.21e+6]Epoch 708/3000:  24%|██▎       | 707/3000 [05:08<16:28,  2.32it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.21e+6]Epoch 708/3000:  24%|██▎       | 708/3000 [05:08<16:37,  2.30it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.21e+6]Epoch 708/3000:  24%|██▎       | 708/3000 [05:08<16:37,  2.30it/s, v_num=1, train_loss_step=2.27e+6, train_loss_epoch=2.21e+6]Epoch 709/3000:  24%|██▎       | 708/3000 [05:08<16:37,  2.30it/s, v_num=1, train_loss_step=2.27e+6, train_loss_epoch=2.21e+6]Epoch 709/3000:  24%|██▎       | 709/3000 [05:09<15:01,  2.54it/s, v_num=1, train_loss_step=2.27e+6, train_loss_epoch=2.21e+6]Epoch 709/3000:  24%|██▎       | 709/3000 [05:09<15:01,  2.54it/s, v_num=1, train_loss_step=2.28e+6, train_loss_epoch=2.21e+6]Epoch 710/3000:  24%|██▎       | 709/3000 [05:09<15:01,  2.54it/s, v_num=1, train_loss_step=2.28e+6, train_loss_epoch=2.21e+6]Epoch 710/3000:  24%|██▎       | 710/3000 [05:09<15:02,  2.54it/s, v_num=1, train_loss_step=2.28e+6, train_loss_epoch=2.21e+6]Epoch 710/3000:  24%|██▎       | 710/3000 [05:09<15:02,  2.54it/s, v_num=1, train_loss_step=2.17e+6, train_loss_epoch=2.2e+6] Epoch 711/3000:  24%|██▎       | 710/3000 [05:09<15:02,  2.54it/s, v_num=1, train_loss_step=2.17e+6, train_loss_epoch=2.2e+6]Epoch 711/3000:  24%|██▎       | 711/3000 [05:09<15:46,  2.42it/s, v_num=1, train_loss_step=2.17e+6, train_loss_epoch=2.2e+6]Epoch 711/3000:  24%|██▎       | 711/3000 [05:09<15:46,  2.42it/s, v_num=1, train_loss_step=2.15e+6, train_loss_epoch=2.2e+6]Epoch 712/3000:  24%|██▎       | 711/3000 [05:09<15:46,  2.42it/s, v_num=1, train_loss_step=2.15e+6, train_loss_epoch=2.2e+6]Epoch 712/3000:  24%|██▎       | 712/3000 [05:10<15:57,  2.39it/s, v_num=1, train_loss_step=2.15e+6, train_loss_epoch=2.2e+6]Epoch 712/3000:  24%|██▎       | 712/3000 [05:10<15:57,  2.39it/s, v_num=1, train_loss_step=2.19e+6, train_loss_epoch=2.2e+6]Epoch 713/3000:  24%|██▎       | 712/3000 [05:10<15:57,  2.39it/s, v_num=1, train_loss_step=2.19e+6, train_loss_epoch=2.2e+6]Epoch 713/3000:  24%|██▍       | 713/3000 [05:10<16:08,  2.36it/s, v_num=1, train_loss_step=2.19e+6, train_loss_epoch=2.2e+6]Epoch 713/3000:  24%|██▍       | 713/3000 [05:10<16:08,  2.36it/s, v_num=1, train_loss_step=2.27e+6, train_loss_epoch=2.2e+6]Epoch 714/3000:  24%|██▍       | 713/3000 [05:10<16:08,  2.36it/s, v_num=1, train_loss_step=2.27e+6, train_loss_epoch=2.2e+6]Epoch 714/3000:  24%|██▍       | 714/3000 [05:11<16:25,  2.32it/s, v_num=1, train_loss_step=2.27e+6, train_loss_epoch=2.2e+6]Epoch 714/3000:  24%|██▍       | 714/3000 [05:11<16:25,  2.32it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=2.2e+6]Epoch 715/3000:  24%|██▍       | 714/3000 [05:11<16:25,  2.32it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=2.2e+6]Epoch 715/3000:  24%|██▍       | 715/3000 [05:11<16:29,  2.31it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=2.2e+6]Epoch 715/3000:  24%|██▍       | 715/3000 [05:11<16:29,  2.31it/s, v_num=1, train_loss_step=2.19e+6, train_loss_epoch=2.19e+6]Epoch 716/3000:  24%|██▍       | 715/3000 [05:11<16:29,  2.31it/s, v_num=1, train_loss_step=2.19e+6, train_loss_epoch=2.19e+6]Epoch 716/3000:  24%|██▍       | 716/3000 [05:12<16:41,  2.28it/s, v_num=1, train_loss_step=2.19e+6, train_loss_epoch=2.19e+6]Epoch 716/3000:  24%|██▍       | 716/3000 [05:12<16:41,  2.28it/s, v_num=1, train_loss_step=2.28e+6, train_loss_epoch=2.19e+6]Epoch 717/3000:  24%|██▍       | 716/3000 [05:12<16:41,  2.28it/s, v_num=1, train_loss_step=2.28e+6, train_loss_epoch=2.19e+6]Epoch 717/3000:  24%|██▍       | 717/3000 [05:12<16:17,  2.33it/s, v_num=1, train_loss_step=2.28e+6, train_loss_epoch=2.19e+6]Epoch 717/3000:  24%|██▍       | 717/3000 [05:12<16:17,  2.33it/s, v_num=1, train_loss_step=2.16e+6, train_loss_epoch=2.19e+6]Epoch 718/3000:  24%|██▍       | 717/3000 [05:12<16:17,  2.33it/s, v_num=1, train_loss_step=2.16e+6, train_loss_epoch=2.19e+6]Epoch 718/3000:  24%|██▍       | 718/3000 [05:13<17:26,  2.18it/s, v_num=1, train_loss_step=2.16e+6, train_loss_epoch=2.19e+6]Epoch 718/3000:  24%|██▍       | 718/3000 [05:13<17:26,  2.18it/s, v_num=1, train_loss_step=2.13e+6, train_loss_epoch=2.19e+6]Epoch 719/3000:  24%|██▍       | 718/3000 [05:13<17:26,  2.18it/s, v_num=1, train_loss_step=2.13e+6, train_loss_epoch=2.19e+6]Epoch 719/3000:  24%|██▍       | 719/3000 [05:13<17:25,  2.18it/s, v_num=1, train_loss_step=2.13e+6, train_loss_epoch=2.19e+6]Epoch 719/3000:  24%|██▍       | 719/3000 [05:13<17:25,  2.18it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.19e+6]Epoch 720/3000:  24%|██▍       | 719/3000 [05:13<17:25,  2.18it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.19e+6]Epoch 720/3000:  24%|██▍       | 720/3000 [05:13<16:56,  2.24it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.19e+6]Epoch 720/3000:  24%|██▍       | 720/3000 [05:13<16:56,  2.24it/s, v_num=1, train_loss_step=2.21e+6, train_loss_epoch=2.19e+6]Epoch 721/3000:  24%|██▍       | 720/3000 [05:13<16:56,  2.24it/s, v_num=1, train_loss_step=2.21e+6, train_loss_epoch=2.19e+6]Epoch 721/3000:  24%|██▍       | 721/3000 [05:14<17:06,  2.22it/s, v_num=1, train_loss_step=2.21e+6, train_loss_epoch=2.19e+6]Epoch 721/3000:  24%|██▍       | 721/3000 [05:14<17:06,  2.22it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.18e+6]Epoch 722/3000:  24%|██▍       | 721/3000 [05:14<17:06,  2.22it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.18e+6]Epoch 722/3000:  24%|██▍       | 722/3000 [05:14<16:30,  2.30it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.18e+6]Epoch 722/3000:  24%|██▍       | 722/3000 [05:14<16:30,  2.30it/s, v_num=1, train_loss_step=2.24e+6, train_loss_epoch=2.18e+6]Epoch 723/3000:  24%|██▍       | 722/3000 [05:14<16:30,  2.30it/s, v_num=1, train_loss_step=2.24e+6, train_loss_epoch=2.18e+6]Epoch 723/3000:  24%|██▍       | 723/3000 [05:15<16:45,  2.27it/s, v_num=1, train_loss_step=2.24e+6, train_loss_epoch=2.18e+6]Epoch 723/3000:  24%|██▍       | 723/3000 [05:15<16:45,  2.27it/s, v_num=1, train_loss_step=2.1e+6, train_loss_epoch=2.18e+6] Epoch 724/3000:  24%|██▍       | 723/3000 [05:15<16:45,  2.27it/s, v_num=1, train_loss_step=2.1e+6, train_loss_epoch=2.18e+6]Epoch 724/3000:  24%|██▍       | 724/3000 [05:15<16:28,  2.30it/s, v_num=1, train_loss_step=2.1e+6, train_loss_epoch=2.18e+6]Epoch 724/3000:  24%|██▍       | 724/3000 [05:15<16:28,  2.30it/s, v_num=1, train_loss_step=2.1e+6, train_loss_epoch=2.18e+6]Epoch 725/3000:  24%|██▍       | 724/3000 [05:15<16:28,  2.30it/s, v_num=1, train_loss_step=2.1e+6, train_loss_epoch=2.18e+6]Epoch 725/3000:  24%|██▍       | 725/3000 [05:16<16:47,  2.26it/s, v_num=1, train_loss_step=2.1e+6, train_loss_epoch=2.18e+6]Epoch 725/3000:  24%|██▍       | 725/3000 [05:16<16:47,  2.26it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.18e+6]Epoch 726/3000:  24%|██▍       | 725/3000 [05:16<16:47,  2.26it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.18e+6]Epoch 726/3000:  24%|██▍       | 726/3000 [05:16<17:12,  2.20it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.18e+6]Epoch 726/3000:  24%|██▍       | 726/3000 [05:16<17:12,  2.20it/s, v_num=1, train_loss_step=2.13e+6, train_loss_epoch=2.17e+6]Epoch 727/3000:  24%|██▍       | 726/3000 [05:16<17:12,  2.20it/s, v_num=1, train_loss_step=2.13e+6, train_loss_epoch=2.17e+6]Epoch 727/3000:  24%|██▍       | 727/3000 [05:17<17:01,  2.23it/s, v_num=1, train_loss_step=2.13e+6, train_loss_epoch=2.17e+6]Epoch 727/3000:  24%|██▍       | 727/3000 [05:17<17:01,  2.23it/s, v_num=1, train_loss_step=2.17e+6, train_loss_epoch=2.17e+6]Epoch 728/3000:  24%|██▍       | 727/3000 [05:17<17:01,  2.23it/s, v_num=1, train_loss_step=2.17e+6, train_loss_epoch=2.17e+6]Epoch 728/3000:  24%|██▍       | 728/3000 [05:17<17:09,  2.21it/s, v_num=1, train_loss_step=2.17e+6, train_loss_epoch=2.17e+6]Epoch 728/3000:  24%|██▍       | 728/3000 [05:17<17:09,  2.21it/s, v_num=1, train_loss_step=2.16e+6, train_loss_epoch=2.17e+6]Epoch 729/3000:  24%|██▍       | 728/3000 [05:17<17:09,  2.21it/s, v_num=1, train_loss_step=2.16e+6, train_loss_epoch=2.17e+6]Epoch 729/3000:  24%|██▍       | 729/3000 [05:17<17:09,  2.21it/s, v_num=1, train_loss_step=2.16e+6, train_loss_epoch=2.17e+6]Epoch 729/3000:  24%|██▍       | 729/3000 [05:17<17:09,  2.21it/s, v_num=1, train_loss_step=2.16e+6, train_loss_epoch=2.17e+6]Epoch 730/3000:  24%|██▍       | 729/3000 [05:17<17:09,  2.21it/s, v_num=1, train_loss_step=2.16e+6, train_loss_epoch=2.17e+6]Epoch 730/3000:  24%|██▍       | 730/3000 [05:18<18:10,  2.08it/s, v_num=1, train_loss_step=2.16e+6, train_loss_epoch=2.17e+6]Epoch 730/3000:  24%|██▍       | 730/3000 [05:18<18:10,  2.08it/s, v_num=1, train_loss_step=2.28e+6, train_loss_epoch=2.17e+6]Epoch 731/3000:  24%|██▍       | 730/3000 [05:18<18:10,  2.08it/s, v_num=1, train_loss_step=2.28e+6, train_loss_epoch=2.17e+6]Epoch 731/3000:  24%|██▍       | 731/3000 [05:19<18:12,  2.08it/s, v_num=1, train_loss_step=2.28e+6, train_loss_epoch=2.17e+6]Epoch 731/3000:  24%|██▍       | 731/3000 [05:19<18:12,  2.08it/s, v_num=1, train_loss_step=2.19e+6, train_loss_epoch=2.17e+6]Epoch 732/3000:  24%|██▍       | 731/3000 [05:19<18:12,  2.08it/s, v_num=1, train_loss_step=2.19e+6, train_loss_epoch=2.17e+6]Epoch 732/3000:  24%|██▍       | 732/3000 [05:19<18:18,  2.06it/s, v_num=1, train_loss_step=2.19e+6, train_loss_epoch=2.17e+6]Epoch 732/3000:  24%|██▍       | 732/3000 [05:19<18:18,  2.06it/s, v_num=1, train_loss_step=2.13e+6, train_loss_epoch=2.16e+6]Epoch 733/3000:  24%|██▍       | 732/3000 [05:19<18:18,  2.06it/s, v_num=1, train_loss_step=2.13e+6, train_loss_epoch=2.16e+6]Epoch 733/3000:  24%|██▍       | 733/3000 [05:19<17:43,  2.13it/s, v_num=1, train_loss_step=2.13e+6, train_loss_epoch=2.16e+6]Epoch 733/3000:  24%|██▍       | 733/3000 [05:19<17:43,  2.13it/s, v_num=1, train_loss_step=2.09e+6, train_loss_epoch=2.16e+6]Epoch 734/3000:  24%|██▍       | 733/3000 [05:19<17:43,  2.13it/s, v_num=1, train_loss_step=2.09e+6, train_loss_epoch=2.16e+6]Epoch 734/3000:  24%|██▍       | 734/3000 [05:20<17:12,  2.19it/s, v_num=1, train_loss_step=2.09e+6, train_loss_epoch=2.16e+6]Epoch 734/3000:  24%|██▍       | 734/3000 [05:20<17:12,  2.19it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.16e+6]Epoch 735/3000:  24%|██▍       | 734/3000 [05:20<17:12,  2.19it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.16e+6]Epoch 735/3000:  24%|██▍       | 735/3000 [05:20<16:39,  2.27it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.16e+6]Epoch 735/3000:  24%|██▍       | 735/3000 [05:20<16:39,  2.27it/s, v_num=1, train_loss_step=2.12e+6, train_loss_epoch=2.16e+6]Epoch 736/3000:  24%|██▍       | 735/3000 [05:20<16:39,  2.27it/s, v_num=1, train_loss_step=2.12e+6, train_loss_epoch=2.16e+6]Epoch 736/3000:  25%|██▍       | 736/3000 [05:21<16:10,  2.33it/s, v_num=1, train_loss_step=2.12e+6, train_loss_epoch=2.16e+6]Epoch 736/3000:  25%|██▍       | 736/3000 [05:21<16:10,  2.33it/s, v_num=1, train_loss_step=2.1e+6, train_loss_epoch=2.16e+6] Epoch 737/3000:  25%|██▍       | 736/3000 [05:21<16:10,  2.33it/s, v_num=1, train_loss_step=2.1e+6, train_loss_epoch=2.16e+6]Epoch 737/3000:  25%|██▍       | 737/3000 [05:21<16:28,  2.29it/s, v_num=1, train_loss_step=2.1e+6, train_loss_epoch=2.16e+6]Epoch 737/3000:  25%|██▍       | 737/3000 [05:21<16:28,  2.29it/s, v_num=1, train_loss_step=2.16e+6, train_loss_epoch=2.15e+6]Epoch 738/3000:  25%|██▍       | 737/3000 [05:21<16:28,  2.29it/s, v_num=1, train_loss_step=2.16e+6, train_loss_epoch=2.15e+6]Epoch 738/3000:  25%|██▍       | 738/3000 [05:22<16:40,  2.26it/s, v_num=1, train_loss_step=2.16e+6, train_loss_epoch=2.15e+6]Epoch 738/3000:  25%|██▍       | 738/3000 [05:22<16:40,  2.26it/s, v_num=1, train_loss_step=2.05e+6, train_loss_epoch=2.15e+6]Epoch 739/3000:  25%|██▍       | 738/3000 [05:22<16:40,  2.26it/s, v_num=1, train_loss_step=2.05e+6, train_loss_epoch=2.15e+6]Epoch 739/3000:  25%|██▍       | 739/3000 [05:22<16:10,  2.33it/s, v_num=1, train_loss_step=2.05e+6, train_loss_epoch=2.15e+6]Epoch 739/3000:  25%|██▍       | 739/3000 [05:22<16:10,  2.33it/s, v_num=1, train_loss_step=2e+6, train_loss_epoch=2.15e+6]   Epoch 740/3000:  25%|██▍       | 739/3000 [05:22<16:10,  2.33it/s, v_num=1, train_loss_step=2e+6, train_loss_epoch=2.15e+6]Epoch 740/3000:  25%|██▍       | 740/3000 [05:22<15:43,  2.40it/s, v_num=1, train_loss_step=2e+6, train_loss_epoch=2.15e+6]Epoch 740/3000:  25%|██▍       | 740/3000 [05:22<15:43,  2.40it/s, v_num=1, train_loss_step=2.21e+6, train_loss_epoch=2.15e+6]Epoch 741/3000:  25%|██▍       | 740/3000 [05:22<15:43,  2.40it/s, v_num=1, train_loss_step=2.21e+6, train_loss_epoch=2.15e+6]Epoch 741/3000:  25%|██▍       | 741/3000 [05:23<16:44,  2.25it/s, v_num=1, train_loss_step=2.21e+6, train_loss_epoch=2.15e+6]Epoch 741/3000:  25%|██▍       | 741/3000 [05:23<16:44,  2.25it/s, v_num=1, train_loss_step=2.08e+6, train_loss_epoch=2.15e+6]Epoch 742/3000:  25%|██▍       | 741/3000 [05:23<16:44,  2.25it/s, v_num=1, train_loss_step=2.08e+6, train_loss_epoch=2.15e+6]Epoch 742/3000:  25%|██▍       | 742/3000 [05:23<16:47,  2.24it/s, v_num=1, train_loss_step=2.08e+6, train_loss_epoch=2.15e+6]Epoch 742/3000:  25%|██▍       | 742/3000 [05:23<16:47,  2.24it/s, v_num=1, train_loss_step=2.15e+6, train_loss_epoch=2.15e+6]Epoch 743/3000:  25%|██▍       | 742/3000 [05:23<16:47,  2.24it/s, v_num=1, train_loss_step=2.15e+6, train_loss_epoch=2.15e+6]Epoch 743/3000:  25%|██▍       | 743/3000 [05:24<17:30,  2.15it/s, v_num=1, train_loss_step=2.15e+6, train_loss_epoch=2.15e+6]Epoch 743/3000:  25%|██▍       | 743/3000 [05:24<17:30,  2.15it/s, v_num=1, train_loss_step=2.18e+6, train_loss_epoch=2.14e+6]Epoch 744/3000:  25%|██▍       | 743/3000 [05:24<17:30,  2.15it/s, v_num=1, train_loss_step=2.18e+6, train_loss_epoch=2.14e+6]Epoch 744/3000:  25%|██▍       | 744/3000 [05:24<17:32,  2.14it/s, v_num=1, train_loss_step=2.18e+6, train_loss_epoch=2.14e+6]Epoch 744/3000:  25%|██▍       | 744/3000 [05:24<17:32,  2.14it/s, v_num=1, train_loss_step=2.18e+6, train_loss_epoch=2.14e+6]Epoch 745/3000:  25%|██▍       | 744/3000 [05:24<17:32,  2.14it/s, v_num=1, train_loss_step=2.18e+6, train_loss_epoch=2.14e+6]Epoch 745/3000:  25%|██▍       | 745/3000 [05:25<17:30,  2.15it/s, v_num=1, train_loss_step=2.18e+6, train_loss_epoch=2.14e+6]Epoch 745/3000:  25%|██▍       | 745/3000 [05:25<17:30,  2.15it/s, v_num=1, train_loss_step=2.17e+6, train_loss_epoch=2.14e+6]Epoch 746/3000:  25%|██▍       | 745/3000 [05:25<17:30,  2.15it/s, v_num=1, train_loss_step=2.17e+6, train_loss_epoch=2.14e+6]Epoch 746/3000:  25%|██▍       | 746/3000 [05:25<17:34,  2.14it/s, v_num=1, train_loss_step=2.17e+6, train_loss_epoch=2.14e+6]Epoch 746/3000:  25%|██▍       | 746/3000 [05:25<17:34,  2.14it/s, v_num=1, train_loss_step=2.16e+6, train_loss_epoch=2.14e+6]Epoch 747/3000:  25%|██▍       | 746/3000 [05:25<17:34,  2.14it/s, v_num=1, train_loss_step=2.16e+6, train_loss_epoch=2.14e+6]Epoch 747/3000:  25%|██▍       | 747/3000 [05:26<17:41,  2.12it/s, v_num=1, train_loss_step=2.16e+6, train_loss_epoch=2.14e+6]Epoch 747/3000:  25%|██▍       | 747/3000 [05:26<17:41,  2.12it/s, v_num=1, train_loss_step=2.33e+6, train_loss_epoch=2.14e+6]Epoch 748/3000:  25%|██▍       | 747/3000 [05:26<17:41,  2.12it/s, v_num=1, train_loss_step=2.33e+6, train_loss_epoch=2.14e+6]Epoch 748/3000:  25%|██▍       | 748/3000 [05:26<17:03,  2.20it/s, v_num=1, train_loss_step=2.33e+6, train_loss_epoch=2.14e+6]Epoch 748/3000:  25%|██▍       | 748/3000 [05:26<17:03,  2.20it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.14e+6]Epoch 749/3000:  25%|██▍       | 748/3000 [05:26<17:03,  2.20it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.14e+6]Epoch 749/3000:  25%|██▍       | 749/3000 [05:27<16:51,  2.23it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.14e+6]Epoch 749/3000:  25%|██▍       | 749/3000 [05:27<16:51,  2.23it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.13e+6]Epoch 750/3000:  25%|██▍       | 749/3000 [05:27<16:51,  2.23it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.13e+6]Epoch 750/3000:  25%|██▌       | 750/3000 [05:27<16:59,  2.21it/s, v_num=1, train_loss_step=2.35e+6, train_loss_epoch=2.13e+6]Epoch 750/3000:  25%|██▌       | 750/3000 [05:27<16:59,  2.21it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.13e+6]Epoch 751/3000:  25%|██▌       | 750/3000 [05:27<16:59,  2.21it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.13e+6]Epoch 751/3000:  25%|██▌       | 751/3000 [05:28<17:30,  2.14it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.13e+6]Epoch 751/3000:  25%|██▌       | 751/3000 [05:28<17:30,  2.14it/s, v_num=1, train_loss_step=2.11e+6, train_loss_epoch=2.13e+6]Epoch 752/3000:  25%|██▌       | 751/3000 [05:28<17:30,  2.14it/s, v_num=1, train_loss_step=2.11e+6, train_loss_epoch=2.13e+6]Epoch 752/3000:  25%|██▌       | 752/3000 [05:28<17:32,  2.14it/s, v_num=1, train_loss_step=2.11e+6, train_loss_epoch=2.13e+6]Epoch 752/3000:  25%|██▌       | 752/3000 [05:28<17:32,  2.14it/s, v_num=1, train_loss_step=2.11e+6, train_loss_epoch=2.13e+6]Epoch 753/3000:  25%|██▌       | 752/3000 [05:28<17:32,  2.14it/s, v_num=1, train_loss_step=2.11e+6, train_loss_epoch=2.13e+6]Epoch 753/3000:  25%|██▌       | 753/3000 [05:28<17:22,  2.16it/s, v_num=1, train_loss_step=2.11e+6, train_loss_epoch=2.13e+6]Epoch 753/3000:  25%|██▌       | 753/3000 [05:28<17:22,  2.16it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.13e+6]Epoch 754/3000:  25%|██▌       | 753/3000 [05:28<17:22,  2.16it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.13e+6]Epoch 754/3000:  25%|██▌       | 754/3000 [05:29<17:34,  2.13it/s, v_num=1, train_loss_step=2.22e+6, train_loss_epoch=2.13e+6]Epoch 754/3000:  25%|██▌       | 754/3000 [05:29<17:34,  2.13it/s, v_num=1, train_loss_step=2.21e+6, train_loss_epoch=2.13e+6]Epoch 755/3000:  25%|██▌       | 754/3000 [05:29<17:34,  2.13it/s, v_num=1, train_loss_step=2.21e+6, train_loss_epoch=2.13e+6]Epoch 755/3000:  25%|██▌       | 755/3000 [05:29<16:42,  2.24it/s, v_num=1, train_loss_step=2.21e+6, train_loss_epoch=2.13e+6]Epoch 755/3000:  25%|██▌       | 755/3000 [05:29<16:42,  2.24it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.12e+6]Epoch 756/3000:  25%|██▌       | 755/3000 [05:29<16:42,  2.24it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.12e+6]Epoch 756/3000:  25%|██▌       | 756/3000 [05:30<15:57,  2.34it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.12e+6]Epoch 756/3000:  25%|██▌       | 756/3000 [05:30<15:57,  2.34it/s, v_num=1, train_loss_step=2.16e+6, train_loss_epoch=2.12e+6]Epoch 757/3000:  25%|██▌       | 756/3000 [05:30<15:57,  2.34it/s, v_num=1, train_loss_step=2.16e+6, train_loss_epoch=2.12e+6]Epoch 757/3000:  25%|██▌       | 757/3000 [05:30<15:49,  2.36it/s, v_num=1, train_loss_step=2.16e+6, train_loss_epoch=2.12e+6]Epoch 757/3000:  25%|██▌       | 757/3000 [05:30<15:49,  2.36it/s, v_num=1, train_loss_step=2.12e+6, train_loss_epoch=2.12e+6]Epoch 758/3000:  25%|██▌       | 757/3000 [05:30<15:49,  2.36it/s, v_num=1, train_loss_step=2.12e+6, train_loss_epoch=2.12e+6]Epoch 758/3000:  25%|██▌       | 758/3000 [05:31<15:19,  2.44it/s, v_num=1, train_loss_step=2.12e+6, train_loss_epoch=2.12e+6]Epoch 758/3000:  25%|██▌       | 758/3000 [05:31<15:19,  2.44it/s, v_num=1, train_loss_step=2.09e+6, train_loss_epoch=2.12e+6]Epoch 759/3000:  25%|██▌       | 758/3000 [05:31<15:19,  2.44it/s, v_num=1, train_loss_step=2.09e+6, train_loss_epoch=2.12e+6]Epoch 759/3000:  25%|██▌       | 759/3000 [05:31<15:59,  2.34it/s, v_num=1, train_loss_step=2.09e+6, train_loss_epoch=2.12e+6]Epoch 759/3000:  25%|██▌       | 759/3000 [05:31<15:59,  2.34it/s, v_num=1, train_loss_step=2.04e+6, train_loss_epoch=2.12e+6]Epoch 760/3000:  25%|██▌       | 759/3000 [05:31<15:59,  2.34it/s, v_num=1, train_loss_step=2.04e+6, train_loss_epoch=2.12e+6]Epoch 760/3000:  25%|██▌       | 760/3000 [05:31<14:50,  2.52it/s, v_num=1, train_loss_step=2.04e+6, train_loss_epoch=2.12e+6]Epoch 760/3000:  25%|██▌       | 760/3000 [05:31<14:50,  2.52it/s, v_num=1, train_loss_step=2.19e+6, train_loss_epoch=2.12e+6]Epoch 761/3000:  25%|██▌       | 760/3000 [05:31<14:50,  2.52it/s, v_num=1, train_loss_step=2.19e+6, train_loss_epoch=2.12e+6]Epoch 761/3000:  25%|██▌       | 761/3000 [05:32<15:37,  2.39it/s, v_num=1, train_loss_step=2.19e+6, train_loss_epoch=2.12e+6]Epoch 761/3000:  25%|██▌       | 761/3000 [05:32<15:37,  2.39it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.11e+6]Epoch 762/3000:  25%|██▌       | 761/3000 [05:32<15:37,  2.39it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.11e+6]Epoch 762/3000:  25%|██▌       | 762/3000 [05:32<15:53,  2.35it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.11e+6]Epoch 762/3000:  25%|██▌       | 762/3000 [05:32<15:53,  2.35it/s, v_num=1, train_loss_step=2.15e+6, train_loss_epoch=2.11e+6]Epoch 763/3000:  25%|██▌       | 762/3000 [05:32<15:53,  2.35it/s, v_num=1, train_loss_step=2.15e+6, train_loss_epoch=2.11e+6]Epoch 763/3000:  25%|██▌       | 763/3000 [05:33<16:10,  2.31it/s, v_num=1, train_loss_step=2.15e+6, train_loss_epoch=2.11e+6]Epoch 763/3000:  25%|██▌       | 763/3000 [05:33<16:10,  2.31it/s, v_num=1, train_loss_step=2.04e+6, train_loss_epoch=2.11e+6]Epoch 764/3000:  25%|██▌       | 763/3000 [05:33<16:10,  2.31it/s, v_num=1, train_loss_step=2.04e+6, train_loss_epoch=2.11e+6]Epoch 764/3000:  25%|██▌       | 764/3000 [05:33<16:20,  2.28it/s, v_num=1, train_loss_step=2.04e+6, train_loss_epoch=2.11e+6]Epoch 764/3000:  25%|██▌       | 764/3000 [05:33<16:20,  2.28it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=2.11e+6]Epoch 765/3000:  25%|██▌       | 764/3000 [05:33<16:20,  2.28it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=2.11e+6]Epoch 765/3000:  26%|██▌       | 765/3000 [05:34<15:51,  2.35it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=2.11e+6]Epoch 765/3000:  26%|██▌       | 765/3000 [05:34<15:51,  2.35it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=2.11e+6]Epoch 766/3000:  26%|██▌       | 765/3000 [05:34<15:51,  2.35it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=2.11e+6]Epoch 766/3000:  26%|██▌       | 766/3000 [05:34<16:36,  2.24it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=2.11e+6]Epoch 766/3000:  26%|██▌       | 766/3000 [05:34<16:36,  2.24it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=2.11e+6]Epoch 767/3000:  26%|██▌       | 766/3000 [05:34<16:36,  2.24it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=2.11e+6]Epoch 767/3000:  26%|██▌       | 767/3000 [05:34<16:34,  2.25it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=2.11e+6]Epoch 767/3000:  26%|██▌       | 767/3000 [05:34<16:34,  2.25it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.1e+6] Epoch 768/3000:  26%|██▌       | 767/3000 [05:34<16:34,  2.25it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.1e+6]Epoch 768/3000:  26%|██▌       | 768/3000 [05:35<15:38,  2.38it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.1e+6]Epoch 768/3000:  26%|██▌       | 768/3000 [05:35<15:38,  2.38it/s, v_num=1, train_loss_step=2.18e+6, train_loss_epoch=2.1e+6]Epoch 769/3000:  26%|██▌       | 768/3000 [05:35<15:38,  2.38it/s, v_num=1, train_loss_step=2.18e+6, train_loss_epoch=2.1e+6]Epoch 769/3000:  26%|██▌       | 769/3000 [05:35<15:43,  2.36it/s, v_num=1, train_loss_step=2.18e+6, train_loss_epoch=2.1e+6]Epoch 769/3000:  26%|██▌       | 769/3000 [05:35<15:43,  2.36it/s, v_num=1, train_loss_step=2.24e+6, train_loss_epoch=2.1e+6]Epoch 770/3000:  26%|██▌       | 769/3000 [05:35<15:43,  2.36it/s, v_num=1, train_loss_step=2.24e+6, train_loss_epoch=2.1e+6]Epoch 770/3000:  26%|██▌       | 770/3000 [05:36<16:14,  2.29it/s, v_num=1, train_loss_step=2.24e+6, train_loss_epoch=2.1e+6]Epoch 770/3000:  26%|██▌       | 770/3000 [05:36<16:14,  2.29it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=2.1e+6]Epoch 771/3000:  26%|██▌       | 770/3000 [05:36<16:14,  2.29it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=2.1e+6]Epoch 771/3000:  26%|██▌       | 771/3000 [05:36<16:44,  2.22it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=2.1e+6]Epoch 771/3000:  26%|██▌       | 771/3000 [05:36<16:44,  2.22it/s, v_num=1, train_loss_step=2.09e+6, train_loss_epoch=2.1e+6]Epoch 772/3000:  26%|██▌       | 771/3000 [05:36<16:44,  2.22it/s, v_num=1, train_loss_step=2.09e+6, train_loss_epoch=2.1e+6]Epoch 772/3000:  26%|██▌       | 772/3000 [05:37<16:28,  2.25it/s, v_num=1, train_loss_step=2.09e+6, train_loss_epoch=2.1e+6]Epoch 772/3000:  26%|██▌       | 772/3000 [05:37<16:28,  2.25it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=2.1e+6]Epoch 773/3000:  26%|██▌       | 772/3000 [05:37<16:28,  2.25it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=2.1e+6]Epoch 773/3000:  26%|██▌       | 773/3000 [05:37<16:35,  2.24it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=2.1e+6]Epoch 773/3000:  26%|██▌       | 773/3000 [05:37<16:35,  2.24it/s, v_num=1, train_loss_step=2.12e+6, train_loss_epoch=2.1e+6]Epoch 774/3000:  26%|██▌       | 773/3000 [05:37<16:35,  2.24it/s, v_num=1, train_loss_step=2.12e+6, train_loss_epoch=2.1e+6]Epoch 774/3000:  26%|██▌       | 774/3000 [05:38<16:15,  2.28it/s, v_num=1, train_loss_step=2.12e+6, train_loss_epoch=2.1e+6]Epoch 774/3000:  26%|██▌       | 774/3000 [05:38<16:15,  2.28it/s, v_num=1, train_loss_step=2.06e+6, train_loss_epoch=2.09e+6]Epoch 775/3000:  26%|██▌       | 774/3000 [05:38<16:15,  2.28it/s, v_num=1, train_loss_step=2.06e+6, train_loss_epoch=2.09e+6]Epoch 775/3000:  26%|██▌       | 775/3000 [05:38<16:26,  2.25it/s, v_num=1, train_loss_step=2.06e+6, train_loss_epoch=2.09e+6]Epoch 775/3000:  26%|██▌       | 775/3000 [05:38<16:26,  2.25it/s, v_num=1, train_loss_step=2.15e+6, train_loss_epoch=2.09e+6]Epoch 776/3000:  26%|██▌       | 775/3000 [05:38<16:26,  2.25it/s, v_num=1, train_loss_step=2.15e+6, train_loss_epoch=2.09e+6]Epoch 776/3000:  26%|██▌       | 776/3000 [05:38<16:29,  2.25it/s, v_num=1, train_loss_step=2.15e+6, train_loss_epoch=2.09e+6]Epoch 776/3000:  26%|██▌       | 776/3000 [05:38<16:29,  2.25it/s, v_num=1, train_loss_step=2.13e+6, train_loss_epoch=2.09e+6]Epoch 777/3000:  26%|██▌       | 776/3000 [05:38<16:29,  2.25it/s, v_num=1, train_loss_step=2.13e+6, train_loss_epoch=2.09e+6]Epoch 777/3000:  26%|██▌       | 777/3000 [05:39<16:39,  2.22it/s, v_num=1, train_loss_step=2.13e+6, train_loss_epoch=2.09e+6]Epoch 777/3000:  26%|██▌       | 777/3000 [05:39<16:39,  2.22it/s, v_num=1, train_loss_step=2.09e+6, train_loss_epoch=2.09e+6]Epoch 778/3000:  26%|██▌       | 777/3000 [05:39<16:39,  2.22it/s, v_num=1, train_loss_step=2.09e+6, train_loss_epoch=2.09e+6]Epoch 778/3000:  26%|██▌       | 778/3000 [05:39<17:10,  2.16it/s, v_num=1, train_loss_step=2.09e+6, train_loss_epoch=2.09e+6]Epoch 778/3000:  26%|██▌       | 778/3000 [05:39<17:10,  2.16it/s, v_num=1, train_loss_step=2.23e+6, train_loss_epoch=2.09e+6]Epoch 779/3000:  26%|██▌       | 778/3000 [05:39<17:10,  2.16it/s, v_num=1, train_loss_step=2.23e+6, train_loss_epoch=2.09e+6]Epoch 779/3000:  26%|██▌       | 779/3000 [05:40<16:35,  2.23it/s, v_num=1, train_loss_step=2.23e+6, train_loss_epoch=2.09e+6]Epoch 779/3000:  26%|██▌       | 779/3000 [05:40<16:35,  2.23it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.09e+6]Epoch 780/3000:  26%|██▌       | 779/3000 [05:40<16:35,  2.23it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.09e+6]Epoch 780/3000:  26%|██▌       | 780/3000 [05:40<16:45,  2.21it/s, v_num=1, train_loss_step=2.14e+6, train_loss_epoch=2.09e+6]Epoch 780/3000:  26%|██▌       | 780/3000 [05:40<16:45,  2.21it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.08e+6]Epoch 781/3000:  26%|██▌       | 780/3000 [05:40<16:45,  2.21it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.08e+6]Epoch 781/3000:  26%|██▌       | 781/3000 [05:41<16:22,  2.26it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.08e+6]Epoch 781/3000:  26%|██▌       | 781/3000 [05:41<16:22,  2.26it/s, v_num=1, train_loss_step=2.05e+6, train_loss_epoch=2.08e+6]Epoch 782/3000:  26%|██▌       | 781/3000 [05:41<16:22,  2.26it/s, v_num=1, train_loss_step=2.05e+6, train_loss_epoch=2.08e+6]Epoch 782/3000:  26%|██▌       | 782/3000 [05:41<15:46,  2.34it/s, v_num=1, train_loss_step=2.05e+6, train_loss_epoch=2.08e+6]Epoch 782/3000:  26%|██▌       | 782/3000 [05:41<15:46,  2.34it/s, v_num=1, train_loss_step=2.17e+6, train_loss_epoch=2.08e+6]Epoch 783/3000:  26%|██▌       | 782/3000 [05:41<15:46,  2.34it/s, v_num=1, train_loss_step=2.17e+6, train_loss_epoch=2.08e+6]Epoch 783/3000:  26%|██▌       | 783/3000 [05:41<15:01,  2.46it/s, v_num=1, train_loss_step=2.17e+6, train_loss_epoch=2.08e+6]Epoch 783/3000:  26%|██▌       | 783/3000 [05:41<15:01,  2.46it/s, v_num=1, train_loss_step=2.1e+6, train_loss_epoch=2.08e+6] Epoch 784/3000:  26%|██▌       | 783/3000 [05:41<15:01,  2.46it/s, v_num=1, train_loss_step=2.1e+6, train_loss_epoch=2.08e+6]Epoch 784/3000:  26%|██▌       | 784/3000 [05:42<15:07,  2.44it/s, v_num=1, train_loss_step=2.1e+6, train_loss_epoch=2.08e+6]Epoch 784/3000:  26%|██▌       | 784/3000 [05:42<15:07,  2.44it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=2.08e+6]Epoch 785/3000:  26%|██▌       | 784/3000 [05:42<15:07,  2.44it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=2.08e+6]Epoch 785/3000:  26%|██▌       | 785/3000 [05:42<16:01,  2.30it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=2.08e+6]Epoch 785/3000:  26%|██▌       | 785/3000 [05:42<16:01,  2.30it/s, v_num=1, train_loss_step=2.11e+6, train_loss_epoch=2.08e+6]Epoch 786/3000:  26%|██▌       | 785/3000 [05:42<16:01,  2.30it/s, v_num=1, train_loss_step=2.11e+6, train_loss_epoch=2.08e+6]Epoch 786/3000:  26%|██▌       | 786/3000 [05:43<16:38,  2.22it/s, v_num=1, train_loss_step=2.11e+6, train_loss_epoch=2.08e+6]Epoch 786/3000:  26%|██▌       | 786/3000 [05:43<16:38,  2.22it/s, v_num=1, train_loss_step=2.05e+6, train_loss_epoch=2.08e+6]Epoch 787/3000:  26%|██▌       | 786/3000 [05:43<16:38,  2.22it/s, v_num=1, train_loss_step=2.05e+6, train_loss_epoch=2.08e+6]Epoch 787/3000:  26%|██▌       | 787/3000 [05:43<16:16,  2.27it/s, v_num=1, train_loss_step=2.05e+6, train_loss_epoch=2.08e+6]Epoch 787/3000:  26%|██▌       | 787/3000 [05:43<16:16,  2.27it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.07e+6]Epoch 788/3000:  26%|██▌       | 787/3000 [05:43<16:16,  2.27it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.07e+6]Epoch 788/3000:  26%|██▋       | 788/3000 [05:44<15:48,  2.33it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.07e+6]Epoch 788/3000:  26%|██▋       | 788/3000 [05:44<15:48,  2.33it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=2.07e+6]Epoch 789/3000:  26%|██▋       | 788/3000 [05:44<15:48,  2.33it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=2.07e+6]Epoch 789/3000:  26%|██▋       | 789/3000 [05:44<16:25,  2.24it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=2.07e+6]Epoch 789/3000:  26%|██▋       | 789/3000 [05:44<16:25,  2.24it/s, v_num=1, train_loss_step=2.04e+6, train_loss_epoch=2.07e+6]Epoch 790/3000:  26%|██▋       | 789/3000 [05:44<16:25,  2.24it/s, v_num=1, train_loss_step=2.04e+6, train_loss_epoch=2.07e+6]Epoch 790/3000:  26%|██▋       | 790/3000 [05:44<15:41,  2.35it/s, v_num=1, train_loss_step=2.04e+6, train_loss_epoch=2.07e+6]Epoch 790/3000:  26%|██▋       | 790/3000 [05:44<15:41,  2.35it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.07e+6]Epoch 791/3000:  26%|██▋       | 790/3000 [05:44<15:41,  2.35it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.07e+6]Epoch 791/3000:  26%|██▋       | 791/3000 [05:45<15:36,  2.36it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.07e+6]Epoch 791/3000:  26%|██▋       | 791/3000 [05:45<15:36,  2.36it/s, v_num=1, train_loss_step=2.1e+6, train_loss_epoch=2.07e+6] Epoch 792/3000:  26%|██▋       | 791/3000 [05:45<15:36,  2.36it/s, v_num=1, train_loss_step=2.1e+6, train_loss_epoch=2.07e+6]Epoch 792/3000:  26%|██▋       | 792/3000 [05:45<16:54,  2.18it/s, v_num=1, train_loss_step=2.1e+6, train_loss_epoch=2.07e+6]Epoch 792/3000:  26%|██▋       | 792/3000 [05:45<16:54,  2.18it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=2.07e+6]Epoch 793/3000:  26%|██▋       | 792/3000 [05:45<16:54,  2.18it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=2.07e+6]Epoch 793/3000:  26%|██▋       | 793/3000 [05:46<16:45,  2.20it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=2.07e+6]Epoch 793/3000:  26%|██▋       | 793/3000 [05:46<16:45,  2.20it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=2.07e+6]Epoch 794/3000:  26%|██▋       | 793/3000 [05:46<16:45,  2.20it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=2.07e+6]Epoch 794/3000:  26%|██▋       | 794/3000 [05:46<16:39,  2.21it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=2.07e+6]Epoch 794/3000:  26%|██▋       | 794/3000 [05:46<16:39,  2.21it/s, v_num=1, train_loss_step=2.04e+6, train_loss_epoch=2.06e+6]Epoch 795/3000:  26%|██▋       | 794/3000 [05:46<16:39,  2.21it/s, v_num=1, train_loss_step=2.04e+6, train_loss_epoch=2.06e+6]Epoch 795/3000:  26%|██▋       | 795/3000 [05:47<15:49,  2.32it/s, v_num=1, train_loss_step=2.04e+6, train_loss_epoch=2.06e+6]Epoch 795/3000:  26%|██▋       | 795/3000 [05:47<15:49,  2.32it/s, v_num=1, train_loss_step=2.08e+6, train_loss_epoch=2.06e+6]Epoch 796/3000:  26%|██▋       | 795/3000 [05:47<15:49,  2.32it/s, v_num=1, train_loss_step=2.08e+6, train_loss_epoch=2.06e+6]Epoch 796/3000:  27%|██▋       | 796/3000 [05:47<15:05,  2.43it/s, v_num=1, train_loss_step=2.08e+6, train_loss_epoch=2.06e+6]Epoch 796/3000:  27%|██▋       | 796/3000 [05:47<15:05,  2.43it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=2.06e+6]Epoch 797/3000:  27%|██▋       | 796/3000 [05:47<15:05,  2.43it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=2.06e+6]Epoch 797/3000:  27%|██▋       | 797/3000 [05:48<15:23,  2.39it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=2.06e+6]Epoch 797/3000:  27%|██▋       | 797/3000 [05:48<15:23,  2.39it/s, v_num=1, train_loss_step=2.12e+6, train_loss_epoch=2.06e+6]Epoch 798/3000:  27%|██▋       | 797/3000 [05:48<15:23,  2.39it/s, v_num=1, train_loss_step=2.12e+6, train_loss_epoch=2.06e+6]Epoch 798/3000:  27%|██▋       | 798/3000 [05:48<15:43,  2.33it/s, v_num=1, train_loss_step=2.12e+6, train_loss_epoch=2.06e+6]Epoch 798/3000:  27%|██▋       | 798/3000 [05:48<15:43,  2.33it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=2.06e+6]Epoch 799/3000:  27%|██▋       | 798/3000 [05:48<15:43,  2.33it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=2.06e+6]Epoch 799/3000:  27%|██▋       | 799/3000 [05:48<15:08,  2.42it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=2.06e+6]Epoch 799/3000:  27%|██▋       | 799/3000 [05:48<15:08,  2.42it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.06e+6]Epoch 800/3000:  27%|██▋       | 799/3000 [05:48<15:08,  2.42it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.06e+6]Epoch 800/3000:  27%|██▋       | 800/3000 [05:49<15:04,  2.43it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.06e+6]Epoch 800/3000:  27%|██▋       | 800/3000 [05:49<15:04,  2.43it/s, v_num=1, train_loss_step=2.12e+6, train_loss_epoch=2.06e+6]Epoch 801/3000:  27%|██▋       | 800/3000 [05:49<15:04,  2.43it/s, v_num=1, train_loss_step=2.12e+6, train_loss_epoch=2.06e+6]Epoch 801/3000:  27%|██▋       | 801/3000 [05:49<16:09,  2.27it/s, v_num=1, train_loss_step=2.12e+6, train_loss_epoch=2.06e+6]Epoch 801/3000:  27%|██▋       | 801/3000 [05:49<16:09,  2.27it/s, v_num=1, train_loss_step=2e+6, train_loss_epoch=2.05e+6]   Epoch 802/3000:  27%|██▋       | 801/3000 [05:49<16:09,  2.27it/s, v_num=1, train_loss_step=2e+6, train_loss_epoch=2.05e+6]Epoch 802/3000:  27%|██▋       | 802/3000 [05:50<15:45,  2.33it/s, v_num=1, train_loss_step=2e+6, train_loss_epoch=2.05e+6]Epoch 802/3000:  27%|██▋       | 802/3000 [05:50<15:45,  2.33it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=2.05e+6]Epoch 803/3000:  27%|██▋       | 802/3000 [05:50<15:45,  2.33it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=2.05e+6]Epoch 803/3000:  27%|██▋       | 803/3000 [05:50<16:16,  2.25it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=2.05e+6]Epoch 803/3000:  27%|██▋       | 803/3000 [05:50<16:16,  2.25it/s, v_num=1, train_loss_step=2.09e+6, train_loss_epoch=2.05e+6]Epoch 804/3000:  27%|██▋       | 803/3000 [05:50<16:16,  2.25it/s, v_num=1, train_loss_step=2.09e+6, train_loss_epoch=2.05e+6]Epoch 804/3000:  27%|██▋       | 804/3000 [05:51<16:23,  2.23it/s, v_num=1, train_loss_step=2.09e+6, train_loss_epoch=2.05e+6]Epoch 804/3000:  27%|██▋       | 804/3000 [05:51<16:23,  2.23it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.05e+6]Epoch 805/3000:  27%|██▋       | 804/3000 [05:51<16:23,  2.23it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.05e+6]Epoch 805/3000:  27%|██▋       | 805/3000 [05:51<15:12,  2.41it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.05e+6]Epoch 805/3000:  27%|██▋       | 805/3000 [05:51<15:12,  2.41it/s, v_num=1, train_loss_step=2.05e+6, train_loss_epoch=2.05e+6]Epoch 806/3000:  27%|██▋       | 805/3000 [05:51<15:12,  2.41it/s, v_num=1, train_loss_step=2.05e+6, train_loss_epoch=2.05e+6]Epoch 806/3000:  27%|██▋       | 806/3000 [05:51<15:32,  2.35it/s, v_num=1, train_loss_step=2.05e+6, train_loss_epoch=2.05e+6]Epoch 806/3000:  27%|██▋       | 806/3000 [05:51<15:32,  2.35it/s, v_num=1, train_loss_step=2.13e+6, train_loss_epoch=2.05e+6]Epoch 807/3000:  27%|██▋       | 806/3000 [05:51<15:32,  2.35it/s, v_num=1, train_loss_step=2.13e+6, train_loss_epoch=2.05e+6]Epoch 807/3000:  27%|██▋       | 807/3000 [05:52<15:46,  2.32it/s, v_num=1, train_loss_step=2.13e+6, train_loss_epoch=2.05e+6]Epoch 807/3000:  27%|██▋       | 807/3000 [05:52<15:46,  2.32it/s, v_num=1, train_loss_step=2.09e+6, train_loss_epoch=2.05e+6]Epoch 808/3000:  27%|██▋       | 807/3000 [05:52<15:46,  2.32it/s, v_num=1, train_loss_step=2.09e+6, train_loss_epoch=2.05e+6]Epoch 808/3000:  27%|██▋       | 808/3000 [05:52<15:30,  2.36it/s, v_num=1, train_loss_step=2.09e+6, train_loss_epoch=2.05e+6]Epoch 808/3000:  27%|██▋       | 808/3000 [05:52<15:30,  2.36it/s, v_num=1, train_loss_step=1.98e+6, train_loss_epoch=2.04e+6]Epoch 809/3000:  27%|██▋       | 808/3000 [05:52<15:30,  2.36it/s, v_num=1, train_loss_step=1.98e+6, train_loss_epoch=2.04e+6]Epoch 809/3000:  27%|██▋       | 809/3000 [05:53<15:24,  2.37it/s, v_num=1, train_loss_step=1.98e+6, train_loss_epoch=2.04e+6]Epoch 809/3000:  27%|██▋       | 809/3000 [05:53<15:24,  2.37it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=2.04e+6]Epoch 810/3000:  27%|██▋       | 809/3000 [05:53<15:24,  2.37it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=2.04e+6]Epoch 810/3000:  27%|██▋       | 810/3000 [05:53<15:55,  2.29it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=2.04e+6]Epoch 810/3000:  27%|██▋       | 810/3000 [05:53<15:55,  2.29it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=2.04e+6]Epoch 811/3000:  27%|██▋       | 810/3000 [05:53<15:55,  2.29it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=2.04e+6]Epoch 811/3000:  27%|██▋       | 811/3000 [05:54<15:55,  2.29it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=2.04e+6]Epoch 811/3000:  27%|██▋       | 811/3000 [05:54<15:55,  2.29it/s, v_num=1, train_loss_step=2.13e+6, train_loss_epoch=2.04e+6]Epoch 812/3000:  27%|██▋       | 811/3000 [05:54<15:55,  2.29it/s, v_num=1, train_loss_step=2.13e+6, train_loss_epoch=2.04e+6]Epoch 812/3000:  27%|██▋       | 812/3000 [05:54<15:46,  2.31it/s, v_num=1, train_loss_step=2.13e+6, train_loss_epoch=2.04e+6]Epoch 812/3000:  27%|██▋       | 812/3000 [05:54<15:46,  2.31it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=2.04e+6]Epoch 813/3000:  27%|██▋       | 812/3000 [05:54<15:46,  2.31it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=2.04e+6]Epoch 813/3000:  27%|██▋       | 813/3000 [05:54<15:48,  2.31it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=2.04e+6]Epoch 813/3000:  27%|██▋       | 813/3000 [05:54<15:48,  2.31it/s, v_num=1, train_loss_step=2.08e+6, train_loss_epoch=2.04e+6]Epoch 814/3000:  27%|██▋       | 813/3000 [05:54<15:48,  2.31it/s, v_num=1, train_loss_step=2.08e+6, train_loss_epoch=2.04e+6]Epoch 814/3000:  27%|██▋       | 814/3000 [05:55<15:34,  2.34it/s, v_num=1, train_loss_step=2.08e+6, train_loss_epoch=2.04e+6]Epoch 814/3000:  27%|██▋       | 814/3000 [05:55<15:34,  2.34it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=2.04e+6]Epoch 815/3000:  27%|██▋       | 814/3000 [05:55<15:34,  2.34it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=2.04e+6]Epoch 815/3000:  27%|██▋       | 815/3000 [05:55<14:58,  2.43it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=2.04e+6]Epoch 815/3000:  27%|██▋       | 815/3000 [05:55<14:58,  2.43it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=2.03e+6]Epoch 816/3000:  27%|██▋       | 815/3000 [05:55<14:58,  2.43it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=2.03e+6]Epoch 816/3000:  27%|██▋       | 816/3000 [05:56<15:51,  2.30it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=2.03e+6]Epoch 816/3000:  27%|██▋       | 816/3000 [05:56<15:51,  2.30it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.03e+6]Epoch 817/3000:  27%|██▋       | 816/3000 [05:56<15:51,  2.30it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.03e+6]Epoch 817/3000:  27%|██▋       | 817/3000 [05:56<16:14,  2.24it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.03e+6]Epoch 817/3000:  27%|██▋       | 817/3000 [05:56<16:14,  2.24it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.03e+6]Epoch 818/3000:  27%|██▋       | 817/3000 [05:56<16:14,  2.24it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.03e+6]Epoch 818/3000:  27%|██▋       | 818/3000 [05:57<16:07,  2.25it/s, v_num=1, train_loss_step=2.02e+6, train_loss_epoch=2.03e+6]Epoch 818/3000:  27%|██▋       | 818/3000 [05:57<16:07,  2.25it/s, v_num=1, train_loss_step=1.98e+6, train_loss_epoch=2.03e+6]Epoch 819/3000:  27%|██▋       | 818/3000 [05:57<16:07,  2.25it/s, v_num=1, train_loss_step=1.98e+6, train_loss_epoch=2.03e+6]Epoch 819/3000:  27%|██▋       | 819/3000 [05:57<15:34,  2.33it/s, v_num=1, train_loss_step=1.98e+6, train_loss_epoch=2.03e+6]Epoch 819/3000:  27%|██▋       | 819/3000 [05:57<15:34,  2.33it/s, v_num=1, train_loss_step=2.05e+6, train_loss_epoch=2.03e+6]Epoch 820/3000:  27%|██▋       | 819/3000 [05:57<15:34,  2.33it/s, v_num=1, train_loss_step=2.05e+6, train_loss_epoch=2.03e+6]Epoch 820/3000:  27%|██▋       | 820/3000 [05:57<15:35,  2.33it/s, v_num=1, train_loss_step=2.05e+6, train_loss_epoch=2.03e+6]Epoch 820/3000:  27%|██▋       | 820/3000 [05:57<15:35,  2.33it/s, v_num=1, train_loss_step=2.13e+6, train_loss_epoch=2.03e+6]Epoch 821/3000:  27%|██▋       | 820/3000 [05:57<15:35,  2.33it/s, v_num=1, train_loss_step=2.13e+6, train_loss_epoch=2.03e+6]Epoch 821/3000:  27%|██▋       | 821/3000 [05:58<15:45,  2.31it/s, v_num=1, train_loss_step=2.13e+6, train_loss_epoch=2.03e+6]Epoch 821/3000:  27%|██▋       | 821/3000 [05:58<15:45,  2.31it/s, v_num=1, train_loss_step=1.95e+6, train_loss_epoch=2.03e+6]Epoch 822/3000:  27%|██▋       | 821/3000 [05:58<15:45,  2.31it/s, v_num=1, train_loss_step=1.95e+6, train_loss_epoch=2.03e+6]Epoch 822/3000:  27%|██▋       | 822/3000 [05:58<15:13,  2.38it/s, v_num=1, train_loss_step=1.95e+6, train_loss_epoch=2.03e+6]Epoch 822/3000:  27%|██▋       | 822/3000 [05:58<15:13,  2.38it/s, v_num=1, train_loss_step=2.08e+6, train_loss_epoch=2.03e+6]Epoch 823/3000:  27%|██▋       | 822/3000 [05:58<15:13,  2.38it/s, v_num=1, train_loss_step=2.08e+6, train_loss_epoch=2.03e+6]Epoch 823/3000:  27%|██▋       | 823/3000 [05:59<14:48,  2.45it/s, v_num=1, train_loss_step=2.08e+6, train_loss_epoch=2.03e+6]Epoch 823/3000:  27%|██▋       | 823/3000 [05:59<14:48,  2.45it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=2.02e+6]Epoch 824/3000:  27%|██▋       | 823/3000 [05:59<14:48,  2.45it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=2.02e+6]Epoch 824/3000:  27%|██▋       | 824/3000 [05:59<14:48,  2.45it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=2.02e+6]Epoch 824/3000:  27%|██▋       | 824/3000 [05:59<14:48,  2.45it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=2.02e+6] Epoch 825/3000:  27%|██▋       | 824/3000 [05:59<14:48,  2.45it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=2.02e+6]Epoch 825/3000:  28%|██▊       | 825/3000 [05:59<14:21,  2.52it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=2.02e+6]Epoch 825/3000:  28%|██▊       | 825/3000 [05:59<14:21,  2.52it/s, v_num=1, train_loss_step=2.11e+6, train_loss_epoch=2.02e+6]Epoch 826/3000:  28%|██▊       | 825/3000 [05:59<14:21,  2.52it/s, v_num=1, train_loss_step=2.11e+6, train_loss_epoch=2.02e+6]Epoch 826/3000:  28%|██▊       | 826/3000 [06:00<14:33,  2.49it/s, v_num=1, train_loss_step=2.11e+6, train_loss_epoch=2.02e+6]Epoch 826/3000:  28%|██▊       | 826/3000 [06:00<14:33,  2.49it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=2.02e+6]Epoch 827/3000:  28%|██▊       | 826/3000 [06:00<14:33,  2.49it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=2.02e+6]Epoch 827/3000:  28%|██▊       | 827/3000 [06:00<14:24,  2.51it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=2.02e+6]Epoch 827/3000:  28%|██▊       | 827/3000 [06:00<14:24,  2.51it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=2.02e+6]Epoch 828/3000:  28%|██▊       | 827/3000 [06:00<14:24,  2.51it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=2.02e+6]Epoch 828/3000:  28%|██▊       | 828/3000 [06:01<14:02,  2.58it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=2.02e+6]Epoch 828/3000:  28%|██▊       | 828/3000 [06:01<14:02,  2.58it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=2.02e+6]Epoch 829/3000:  28%|██▊       | 828/3000 [06:01<14:02,  2.58it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=2.02e+6]Epoch 829/3000:  28%|██▊       | 829/3000 [06:01<14:40,  2.47it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=2.02e+6]Epoch 829/3000:  28%|██▊       | 829/3000 [06:01<14:40,  2.47it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=2.02e+6]Epoch 830/3000:  28%|██▊       | 829/3000 [06:01<14:40,  2.47it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=2.02e+6]Epoch 830/3000:  28%|██▊       | 830/3000 [06:01<13:46,  2.63it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=2.02e+6]Epoch 830/3000:  28%|██▊       | 830/3000 [06:01<13:46,  2.63it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=2.01e+6]Epoch 831/3000:  28%|██▊       | 830/3000 [06:01<13:46,  2.63it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=2.01e+6]Epoch 831/3000:  28%|██▊       | 831/3000 [06:02<14:58,  2.41it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=2.01e+6]Epoch 831/3000:  28%|██▊       | 831/3000 [06:02<14:58,  2.41it/s, v_num=1, train_loss_step=2.04e+6, train_loss_epoch=2.01e+6]Epoch 832/3000:  28%|██▊       | 831/3000 [06:02<14:58,  2.41it/s, v_num=1, train_loss_step=2.04e+6, train_loss_epoch=2.01e+6]Epoch 832/3000:  28%|██▊       | 832/3000 [06:02<14:52,  2.43it/s, v_num=1, train_loss_step=2.04e+6, train_loss_epoch=2.01e+6]Epoch 832/3000:  28%|██▊       | 832/3000 [06:02<14:52,  2.43it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=2.01e+6] Epoch 833/3000:  28%|██▊       | 832/3000 [06:02<14:52,  2.43it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=2.01e+6]Epoch 833/3000:  28%|██▊       | 833/3000 [06:03<14:58,  2.41it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=2.01e+6]Epoch 833/3000:  28%|██▊       | 833/3000 [06:03<14:58,  2.41it/s, v_num=1, train_loss_step=2.04e+6, train_loss_epoch=2.01e+6]Epoch 834/3000:  28%|██▊       | 833/3000 [06:03<14:58,  2.41it/s, v_num=1, train_loss_step=2.04e+6, train_loss_epoch=2.01e+6]Epoch 834/3000:  28%|██▊       | 834/3000 [06:03<14:27,  2.50it/s, v_num=1, train_loss_step=2.04e+6, train_loss_epoch=2.01e+6]Epoch 834/3000:  28%|██▊       | 834/3000 [06:03<14:27,  2.50it/s, v_num=1, train_loss_step=2.06e+6, train_loss_epoch=2.01e+6]Epoch 835/3000:  28%|██▊       | 834/3000 [06:03<14:27,  2.50it/s, v_num=1, train_loss_step=2.06e+6, train_loss_epoch=2.01e+6]Epoch 835/3000:  28%|██▊       | 835/3000 [06:03<14:41,  2.46it/s, v_num=1, train_loss_step=2.06e+6, train_loss_epoch=2.01e+6]Epoch 835/3000:  28%|██▊       | 835/3000 [06:03<14:41,  2.46it/s, v_num=1, train_loss_step=1.98e+6, train_loss_epoch=2.01e+6]Epoch 836/3000:  28%|██▊       | 835/3000 [06:03<14:41,  2.46it/s, v_num=1, train_loss_step=1.98e+6, train_loss_epoch=2.01e+6]Epoch 836/3000:  28%|██▊       | 836/3000 [06:04<15:00,  2.40it/s, v_num=1, train_loss_step=1.98e+6, train_loss_epoch=2.01e+6]Epoch 836/3000:  28%|██▊       | 836/3000 [06:04<15:00,  2.40it/s, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=2.01e+6]Epoch 837/3000:  28%|██▊       | 836/3000 [06:04<15:00,  2.40it/s, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=2.01e+6]Epoch 837/3000:  28%|██▊       | 837/3000 [06:04<15:12,  2.37it/s, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=2.01e+6]Epoch 837/3000:  28%|██▊       | 837/3000 [06:04<15:12,  2.37it/s, v_num=1, train_loss_step=2e+6, train_loss_epoch=2.01e+6]   Epoch 838/3000:  28%|██▊       | 837/3000 [06:04<15:12,  2.37it/s, v_num=1, train_loss_step=2e+6, train_loss_epoch=2.01e+6]Epoch 838/3000:  28%|██▊       | 838/3000 [06:05<15:14,  2.36it/s, v_num=1, train_loss_step=2e+6, train_loss_epoch=2.01e+6]Epoch 838/3000:  28%|██▊       | 838/3000 [06:05<15:14,  2.36it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=2e+6]Epoch 839/3000:  28%|██▊       | 838/3000 [06:05<15:14,  2.36it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=2e+6]Epoch 839/3000:  28%|██▊       | 839/3000 [06:05<15:27,  2.33it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=2e+6]Epoch 839/3000:  28%|██▊       | 839/3000 [06:05<15:27,  2.33it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=2e+6]Epoch 840/3000:  28%|██▊       | 839/3000 [06:05<15:27,  2.33it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=2e+6]Epoch 840/3000:  28%|██▊       | 840/3000 [06:06<15:54,  2.26it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=2e+6]Epoch 840/3000:  28%|██▊       | 840/3000 [06:06<15:54,  2.26it/s, v_num=1, train_loss_step=1.98e+6, train_loss_epoch=2e+6]Epoch 841/3000:  28%|██▊       | 840/3000 [06:06<15:54,  2.26it/s, v_num=1, train_loss_step=1.98e+6, train_loss_epoch=2e+6]Epoch 841/3000:  28%|██▊       | 841/3000 [06:06<16:33,  2.17it/s, v_num=1, train_loss_step=1.98e+6, train_loss_epoch=2e+6]Epoch 841/3000:  28%|██▊       | 841/3000 [06:06<16:33,  2.17it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=2e+6]Epoch 842/3000:  28%|██▊       | 841/3000 [06:06<16:33,  2.17it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=2e+6]Epoch 842/3000:  28%|██▊       | 842/3000 [06:07<16:28,  2.18it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=2e+6]Epoch 842/3000:  28%|██▊       | 842/3000 [06:07<16:28,  2.18it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=2e+6]Epoch 843/3000:  28%|██▊       | 842/3000 [06:07<16:28,  2.18it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=2e+6]Epoch 843/3000:  28%|██▊       | 843/3000 [06:07<16:07,  2.23it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=2e+6]Epoch 843/3000:  28%|██▊       | 843/3000 [06:07<16:07,  2.23it/s, v_num=1, train_loss_step=2.12e+6, train_loss_epoch=2e+6]Epoch 844/3000:  28%|██▊       | 843/3000 [06:07<16:07,  2.23it/s, v_num=1, train_loss_step=2.12e+6, train_loss_epoch=2e+6]Epoch 844/3000:  28%|██▊       | 844/3000 [06:07<15:15,  2.35it/s, v_num=1, train_loss_step=2.12e+6, train_loss_epoch=2e+6]Epoch 844/3000:  28%|██▊       | 844/3000 [06:07<15:15,  2.35it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=2e+6]Epoch 845/3000:  28%|██▊       | 844/3000 [06:07<15:15,  2.35it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=2e+6]Epoch 845/3000:  28%|██▊       | 845/3000 [06:08<15:29,  2.32it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=2e+6]Epoch 845/3000:  28%|██▊       | 845/3000 [06:08<15:29,  2.32it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=2e+6]Epoch 846/3000:  28%|██▊       | 845/3000 [06:08<15:29,  2.32it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=2e+6]Epoch 846/3000:  28%|██▊       | 846/3000 [06:08<15:31,  2.31it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=2e+6]Epoch 846/3000:  28%|██▊       | 846/3000 [06:08<15:31,  2.31it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=1.99e+6]Epoch 847/3000:  28%|██▊       | 846/3000 [06:08<15:31,  2.31it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=1.99e+6]Epoch 847/3000:  28%|██▊       | 847/3000 [06:09<15:37,  2.30it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=1.99e+6]Epoch 847/3000:  28%|██▊       | 847/3000 [06:09<15:37,  2.30it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.99e+6]Epoch 848/3000:  28%|██▊       | 847/3000 [06:09<15:37,  2.30it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.99e+6]Epoch 848/3000:  28%|██▊       | 848/3000 [06:09<16:26,  2.18it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.99e+6]Epoch 848/3000:  28%|██▊       | 848/3000 [06:09<16:26,  2.18it/s, v_num=1, train_loss_step=2e+6, train_loss_epoch=1.99e+6]   Epoch 849/3000:  28%|██▊       | 848/3000 [06:09<16:26,  2.18it/s, v_num=1, train_loss_step=2e+6, train_loss_epoch=1.99e+6]Epoch 849/3000:  28%|██▊       | 849/3000 [06:10<17:37,  2.03it/s, v_num=1, train_loss_step=2e+6, train_loss_epoch=1.99e+6]Epoch 849/3000:  28%|██▊       | 849/3000 [06:10<17:37,  2.03it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.99e+6]Epoch 850/3000:  28%|██▊       | 849/3000 [06:10<17:37,  2.03it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.99e+6]Epoch 850/3000:  28%|██▊       | 850/3000 [06:10<17:29,  2.05it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.99e+6]Epoch 850/3000:  28%|██▊       | 850/3000 [06:10<17:29,  2.05it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.99e+6]Epoch 851/3000:  28%|██▊       | 850/3000 [06:10<17:29,  2.05it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.99e+6]Epoch 851/3000:  28%|██▊       | 851/3000 [06:11<16:56,  2.11it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.99e+6]Epoch 851/3000:  28%|██▊       | 851/3000 [06:11<16:56,  2.11it/s, v_num=1, train_loss_step=2.06e+6, train_loss_epoch=1.99e+6]Epoch 852/3000:  28%|██▊       | 851/3000 [06:11<16:56,  2.11it/s, v_num=1, train_loss_step=2.06e+6, train_loss_epoch=1.99e+6]Epoch 852/3000:  28%|██▊       | 852/3000 [06:11<16:38,  2.15it/s, v_num=1, train_loss_step=2.06e+6, train_loss_epoch=1.99e+6]Epoch 852/3000:  28%|██▊       | 852/3000 [06:11<16:38,  2.15it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.99e+6]Epoch 853/3000:  28%|██▊       | 852/3000 [06:11<16:38,  2.15it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.99e+6]Epoch 853/3000:  28%|██▊       | 853/3000 [06:12<16:28,  2.17it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.99e+6]Epoch 853/3000:  28%|██▊       | 853/3000 [06:12<16:28,  2.17it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.99e+6]Epoch 854/3000:  28%|██▊       | 853/3000 [06:12<16:28,  2.17it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.99e+6]Epoch 854/3000:  28%|██▊       | 854/3000 [06:12<15:59,  2.24it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.99e+6]Epoch 854/3000:  28%|██▊       | 854/3000 [06:12<15:59,  2.24it/s, v_num=1, train_loss_step=2.01e+6, train_loss_epoch=1.98e+6]Epoch 855/3000:  28%|██▊       | 854/3000 [06:12<15:59,  2.24it/s, v_num=1, train_loss_step=2.01e+6, train_loss_epoch=1.98e+6]Epoch 855/3000:  28%|██▊       | 855/3000 [06:13<15:49,  2.26it/s, v_num=1, train_loss_step=2.01e+6, train_loss_epoch=1.98e+6]Epoch 855/3000:  28%|██▊       | 855/3000 [06:13<15:49,  2.26it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=1.98e+6]Epoch 856/3000:  28%|██▊       | 855/3000 [06:13<15:49,  2.26it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=1.98e+6]Epoch 856/3000:  29%|██▊       | 856/3000 [06:13<15:17,  2.34it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=1.98e+6]Epoch 856/3000:  29%|██▊       | 856/3000 [06:13<15:17,  2.34it/s, v_num=1, train_loss_step=2.09e+6, train_loss_epoch=1.98e+6]Epoch 857/3000:  29%|██▊       | 856/3000 [06:13<15:17,  2.34it/s, v_num=1, train_loss_step=2.09e+6, train_loss_epoch=1.98e+6]Epoch 857/3000:  29%|██▊       | 857/3000 [06:13<15:34,  2.29it/s, v_num=1, train_loss_step=2.09e+6, train_loss_epoch=1.98e+6]Epoch 857/3000:  29%|██▊       | 857/3000 [06:13<15:34,  2.29it/s, v_num=1, train_loss_step=1.98e+6, train_loss_epoch=1.98e+6]Epoch 858/3000:  29%|██▊       | 857/3000 [06:13<15:34,  2.29it/s, v_num=1, train_loss_step=1.98e+6, train_loss_epoch=1.98e+6]Epoch 858/3000:  29%|██▊       | 858/3000 [06:14<15:25,  2.32it/s, v_num=1, train_loss_step=1.98e+6, train_loss_epoch=1.98e+6]Epoch 858/3000:  29%|██▊       | 858/3000 [06:14<15:25,  2.32it/s, v_num=1, train_loss_step=2.06e+6, train_loss_epoch=1.98e+6]Epoch 859/3000:  29%|██▊       | 858/3000 [06:14<15:25,  2.32it/s, v_num=1, train_loss_step=2.06e+6, train_loss_epoch=1.98e+6]Epoch 859/3000:  29%|██▊       | 859/3000 [06:14<15:47,  2.26it/s, v_num=1, train_loss_step=2.06e+6, train_loss_epoch=1.98e+6]Epoch 859/3000:  29%|██▊       | 859/3000 [06:14<15:47,  2.26it/s, v_num=1, train_loss_step=2.1e+6, train_loss_epoch=1.98e+6] Epoch 860/3000:  29%|██▊       | 859/3000 [06:14<15:47,  2.26it/s, v_num=1, train_loss_step=2.1e+6, train_loss_epoch=1.98e+6]Epoch 860/3000:  29%|██▊       | 860/3000 [06:15<15:41,  2.27it/s, v_num=1, train_loss_step=2.1e+6, train_loss_epoch=1.98e+6]Epoch 860/3000:  29%|██▊       | 860/3000 [06:15<15:41,  2.27it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=1.98e+6]Epoch 861/3000:  29%|██▊       | 860/3000 [06:15<15:41,  2.27it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=1.98e+6]Epoch 861/3000:  29%|██▊       | 861/3000 [06:15<15:36,  2.28it/s, v_num=1, train_loss_step=2.07e+6, train_loss_epoch=1.98e+6]Epoch 861/3000:  29%|██▊       | 861/3000 [06:15<15:36,  2.28it/s, v_num=1, train_loss_step=1.98e+6, train_loss_epoch=1.98e+6]Epoch 862/3000:  29%|██▊       | 861/3000 [06:15<15:36,  2.28it/s, v_num=1, train_loss_step=1.98e+6, train_loss_epoch=1.98e+6]Epoch 862/3000:  29%|██▊       | 862/3000 [06:16<16:23,  2.17it/s, v_num=1, train_loss_step=1.98e+6, train_loss_epoch=1.98e+6]Epoch 862/3000:  29%|██▊       | 862/3000 [06:16<16:23,  2.17it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.98e+6]Epoch 863/3000:  29%|██▊       | 862/3000 [06:16<16:23,  2.17it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.98e+6]Epoch 863/3000:  29%|██▉       | 863/3000 [06:16<16:20,  2.18it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.98e+6]Epoch 863/3000:  29%|██▉       | 863/3000 [06:16<16:20,  2.18it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.97e+6]Epoch 864/3000:  29%|██▉       | 863/3000 [06:16<16:20,  2.18it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.97e+6]Epoch 864/3000:  29%|██▉       | 864/3000 [06:16<15:43,  2.26it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.97e+6]Epoch 864/3000:  29%|██▉       | 864/3000 [06:16<15:43,  2.26it/s, v_num=1, train_loss_step=2e+6, train_loss_epoch=1.97e+6]   Epoch 865/3000:  29%|██▉       | 864/3000 [06:16<15:43,  2.26it/s, v_num=1, train_loss_step=2e+6, train_loss_epoch=1.97e+6]Epoch 865/3000:  29%|██▉       | 865/3000 [06:17<15:35,  2.28it/s, v_num=1, train_loss_step=2e+6, train_loss_epoch=1.97e+6]Epoch 865/3000:  29%|██▉       | 865/3000 [06:17<15:35,  2.28it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.97e+6]Epoch 866/3000:  29%|██▉       | 865/3000 [06:17<15:35,  2.28it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.97e+6]Epoch 866/3000:  29%|██▉       | 866/3000 [06:17<14:14,  2.50it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.97e+6]Epoch 866/3000:  29%|██▉       | 866/3000 [06:17<14:14,  2.50it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.97e+6]Epoch 867/3000:  29%|██▉       | 866/3000 [06:17<14:14,  2.50it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.97e+6]Epoch 867/3000:  29%|██▉       | 867/3000 [06:18<14:00,  2.54it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.97e+6]Epoch 867/3000:  29%|██▉       | 867/3000 [06:18<14:00,  2.54it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.97e+6]Epoch 868/3000:  29%|██▉       | 867/3000 [06:18<14:00,  2.54it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.97e+6]Epoch 868/3000:  29%|██▉       | 868/3000 [06:18<14:38,  2.43it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.97e+6]Epoch 868/3000:  29%|██▉       | 868/3000 [06:18<14:38,  2.43it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=1.97e+6]Epoch 869/3000:  29%|██▉       | 868/3000 [06:18<14:38,  2.43it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=1.97e+6]Epoch 869/3000:  29%|██▉       | 869/3000 [06:18<14:41,  2.42it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=1.97e+6]Epoch 869/3000:  29%|██▉       | 869/3000 [06:18<14:41,  2.42it/s, v_num=1, train_loss_step=2.04e+6, train_loss_epoch=1.97e+6]Epoch 870/3000:  29%|██▉       | 869/3000 [06:18<14:41,  2.42it/s, v_num=1, train_loss_step=2.04e+6, train_loss_epoch=1.97e+6]Epoch 870/3000:  29%|██▉       | 870/3000 [06:19<15:49,  2.24it/s, v_num=1, train_loss_step=2.04e+6, train_loss_epoch=1.97e+6]Epoch 870/3000:  29%|██▉       | 870/3000 [06:19<15:49,  2.24it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.97e+6]Epoch 871/3000:  29%|██▉       | 870/3000 [06:19<15:49,  2.24it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.97e+6]Epoch 871/3000:  29%|██▉       | 871/3000 [06:20<16:27,  2.16it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.97e+6]Epoch 871/3000:  29%|██▉       | 871/3000 [06:20<16:27,  2.16it/s, v_num=1, train_loss_step=2.05e+6, train_loss_epoch=1.96e+6]Epoch 872/3000:  29%|██▉       | 871/3000 [06:20<16:27,  2.16it/s, v_num=1, train_loss_step=2.05e+6, train_loss_epoch=1.96e+6]Epoch 872/3000:  29%|██▉       | 872/3000 [06:20<16:14,  2.18it/s, v_num=1, train_loss_step=2.05e+6, train_loss_epoch=1.96e+6]Epoch 872/3000:  29%|██▉       | 872/3000 [06:20<16:14,  2.18it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.96e+6]Epoch 873/3000:  29%|██▉       | 872/3000 [06:20<16:14,  2.18it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.96e+6]Epoch 873/3000:  29%|██▉       | 873/3000 [06:20<15:17,  2.32it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.96e+6]Epoch 873/3000:  29%|██▉       | 873/3000 [06:20<15:17,  2.32it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.96e+6]Epoch 874/3000:  29%|██▉       | 873/3000 [06:20<15:17,  2.32it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.96e+6]Epoch 874/3000:  29%|██▉       | 874/3000 [06:21<16:20,  2.17it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.96e+6]Epoch 874/3000:  29%|██▉       | 874/3000 [06:21<16:20,  2.17it/s, v_num=1, train_loss_step=2.01e+6, train_loss_epoch=1.96e+6]Epoch 875/3000:  29%|██▉       | 874/3000 [06:21<16:20,  2.17it/s, v_num=1, train_loss_step=2.01e+6, train_loss_epoch=1.96e+6]Epoch 875/3000:  29%|██▉       | 875/3000 [06:21<16:42,  2.12it/s, v_num=1, train_loss_step=2.01e+6, train_loss_epoch=1.96e+6]Epoch 875/3000:  29%|██▉       | 875/3000 [06:21<16:42,  2.12it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.96e+6]Epoch 876/3000:  29%|██▉       | 875/3000 [06:21<16:42,  2.12it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.96e+6]Epoch 876/3000:  29%|██▉       | 876/3000 [06:22<16:25,  2.15it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.96e+6]Epoch 876/3000:  29%|██▉       | 876/3000 [06:22<16:25,  2.15it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.96e+6]Epoch 877/3000:  29%|██▉       | 876/3000 [06:22<16:25,  2.15it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.96e+6]Epoch 877/3000:  29%|██▉       | 877/3000 [06:22<15:09,  2.33it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.96e+6]Epoch 877/3000:  29%|██▉       | 877/3000 [06:22<15:09,  2.33it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.96e+6]Epoch 878/3000:  29%|██▉       | 877/3000 [06:22<15:09,  2.33it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.96e+6]Epoch 878/3000:  29%|██▉       | 878/3000 [06:23<15:22,  2.30it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.96e+6]Epoch 878/3000:  29%|██▉       | 878/3000 [06:23<15:22,  2.30it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.96e+6] Epoch 879/3000:  29%|██▉       | 878/3000 [06:23<15:22,  2.30it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.96e+6]Epoch 879/3000:  29%|██▉       | 879/3000 [06:23<15:46,  2.24it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.96e+6]Epoch 879/3000:  29%|██▉       | 879/3000 [06:23<15:46,  2.24it/s, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=1.96e+6]Epoch 880/3000:  29%|██▉       | 879/3000 [06:23<15:46,  2.24it/s, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=1.96e+6]Epoch 880/3000:  29%|██▉       | 880/3000 [06:24<15:53,  2.22it/s, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=1.96e+6]Epoch 880/3000:  29%|██▉       | 880/3000 [06:24<15:53,  2.22it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=1.95e+6]Epoch 881/3000:  29%|██▉       | 880/3000 [06:24<15:53,  2.22it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=1.95e+6]Epoch 881/3000:  29%|██▉       | 881/3000 [06:24<15:24,  2.29it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=1.95e+6]Epoch 881/3000:  29%|██▉       | 881/3000 [06:24<15:24,  2.29it/s, v_num=1, train_loss_step=1.95e+6, train_loss_epoch=1.95e+6]Epoch 882/3000:  29%|██▉       | 881/3000 [06:24<15:24,  2.29it/s, v_num=1, train_loss_step=1.95e+6, train_loss_epoch=1.95e+6]Epoch 882/3000:  29%|██▉       | 882/3000 [06:24<15:57,  2.21it/s, v_num=1, train_loss_step=1.95e+6, train_loss_epoch=1.95e+6]Epoch 882/3000:  29%|██▉       | 882/3000 [06:24<15:57,  2.21it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.95e+6]Epoch 883/3000:  29%|██▉       | 882/3000 [06:24<15:57,  2.21it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.95e+6]Epoch 883/3000:  29%|██▉       | 883/3000 [06:25<15:39,  2.25it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.95e+6]Epoch 883/3000:  29%|██▉       | 883/3000 [06:25<15:39,  2.25it/s, v_num=1, train_loss_step=1.95e+6, train_loss_epoch=1.95e+6]Epoch 884/3000:  29%|██▉       | 883/3000 [06:25<15:39,  2.25it/s, v_num=1, train_loss_step=1.95e+6, train_loss_epoch=1.95e+6]Epoch 884/3000:  29%|██▉       | 884/3000 [06:25<15:29,  2.28it/s, v_num=1, train_loss_step=1.95e+6, train_loss_epoch=1.95e+6]Epoch 884/3000:  29%|██▉       | 884/3000 [06:25<15:29,  2.28it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.95e+6]Epoch 885/3000:  29%|██▉       | 884/3000 [06:25<15:29,  2.28it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.95e+6]Epoch 885/3000:  30%|██▉       | 885/3000 [06:26<15:19,  2.30it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.95e+6]Epoch 885/3000:  30%|██▉       | 885/3000 [06:26<15:19,  2.30it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.95e+6] Epoch 886/3000:  30%|██▉       | 885/3000 [06:26<15:19,  2.30it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.95e+6]Epoch 886/3000:  30%|██▉       | 886/3000 [06:26<15:40,  2.25it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.95e+6]Epoch 886/3000:  30%|██▉       | 886/3000 [06:26<15:40,  2.25it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.95e+6]Epoch 887/3000:  30%|██▉       | 886/3000 [06:26<15:40,  2.25it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.95e+6]Epoch 887/3000:  30%|██▉       | 887/3000 [06:27<15:18,  2.30it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.95e+6]Epoch 887/3000:  30%|██▉       | 887/3000 [06:27<15:18,  2.30it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.95e+6]Epoch 888/3000:  30%|██▉       | 887/3000 [06:27<15:18,  2.30it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.95e+6]Epoch 888/3000:  30%|██▉       | 888/3000 [06:27<15:14,  2.31it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.95e+6]Epoch 888/3000:  30%|██▉       | 888/3000 [06:27<15:14,  2.31it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=1.95e+6]Epoch 889/3000:  30%|██▉       | 888/3000 [06:27<15:14,  2.31it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=1.95e+6]Epoch 889/3000:  30%|██▉       | 889/3000 [06:27<15:14,  2.31it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=1.95e+6]Epoch 889/3000:  30%|██▉       | 889/3000 [06:27<15:14,  2.31it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.94e+6]Epoch 890/3000:  30%|██▉       | 889/3000 [06:27<15:14,  2.31it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.94e+6]Epoch 890/3000:  30%|██▉       | 890/3000 [06:28<15:53,  2.21it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.94e+6]Epoch 890/3000:  30%|██▉       | 890/3000 [06:28<15:53,  2.21it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.94e+6]Epoch 891/3000:  30%|██▉       | 890/3000 [06:28<15:53,  2.21it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.94e+6]Epoch 891/3000:  30%|██▉       | 891/3000 [06:28<15:37,  2.25it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.94e+6]Epoch 891/3000:  30%|██▉       | 891/3000 [06:28<15:37,  2.25it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.94e+6]Epoch 892/3000:  30%|██▉       | 891/3000 [06:28<15:37,  2.25it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.94e+6]Epoch 892/3000:  30%|██▉       | 892/3000 [06:29<15:21,  2.29it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.94e+6]Epoch 892/3000:  30%|██▉       | 892/3000 [06:29<15:21,  2.29it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=1.94e+6]Epoch 893/3000:  30%|██▉       | 892/3000 [06:29<15:21,  2.29it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=1.94e+6]Epoch 893/3000:  30%|██▉       | 893/3000 [06:29<15:54,  2.21it/s, v_num=1, train_loss_step=2.03e+6, train_loss_epoch=1.94e+6]Epoch 893/3000:  30%|██▉       | 893/3000 [06:29<15:54,  2.21it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.94e+6]Epoch 894/3000:  30%|██▉       | 893/3000 [06:29<15:54,  2.21it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.94e+6]Epoch 894/3000:  30%|██▉       | 894/3000 [06:30<15:47,  2.22it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.94e+6]Epoch 894/3000:  30%|██▉       | 894/3000 [06:30<15:47,  2.22it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.94e+6]Epoch 895/3000:  30%|██▉       | 894/3000 [06:30<15:47,  2.22it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.94e+6]Epoch 895/3000:  30%|██▉       | 895/3000 [06:30<15:29,  2.26it/s, v_num=1, train_loss_step=1.99e+6, train_loss_epoch=1.94e+6]Epoch 895/3000:  30%|██▉       | 895/3000 [06:30<15:29,  2.26it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.94e+6]Epoch 896/3000:  30%|██▉       | 895/3000 [06:30<15:29,  2.26it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.94e+6]Epoch 896/3000:  30%|██▉       | 896/3000 [06:31<15:23,  2.28it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.94e+6]Epoch 896/3000:  30%|██▉       | 896/3000 [06:31<15:23,  2.28it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.94e+6]Epoch 897/3000:  30%|██▉       | 896/3000 [06:31<15:23,  2.28it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.94e+6]Epoch 897/3000:  30%|██▉       | 897/3000 [06:31<15:09,  2.31it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.94e+6]Epoch 897/3000:  30%|██▉       | 897/3000 [06:31<15:09,  2.31it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.94e+6]Epoch 898/3000:  30%|██▉       | 897/3000 [06:31<15:09,  2.31it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.94e+6]Epoch 898/3000:  30%|██▉       | 898/3000 [06:32<16:06,  2.18it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.94e+6]Epoch 898/3000:  30%|██▉       | 898/3000 [06:32<16:06,  2.18it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.93e+6]Epoch 899/3000:  30%|██▉       | 898/3000 [06:32<16:06,  2.18it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.93e+6]Epoch 899/3000:  30%|██▉       | 899/3000 [06:32<15:08,  2.31it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.93e+6]Epoch 899/3000:  30%|██▉       | 899/3000 [06:32<15:08,  2.31it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.93e+6]Epoch 900/3000:  30%|██▉       | 899/3000 [06:32<15:08,  2.31it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.93e+6]Epoch 900/3000:  30%|███       | 900/3000 [06:32<15:04,  2.32it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.93e+6]Epoch 900/3000:  30%|███       | 900/3000 [06:32<15:04,  2.32it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.93e+6]Epoch 901/3000:  30%|███       | 900/3000 [06:32<15:04,  2.32it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.93e+6]Epoch 901/3000:  30%|███       | 901/3000 [06:33<14:39,  2.39it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.93e+6]Epoch 901/3000:  30%|███       | 901/3000 [06:33<14:39,  2.39it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.93e+6]Epoch 902/3000:  30%|███       | 901/3000 [06:33<14:39,  2.39it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.93e+6]Epoch 902/3000:  30%|███       | 902/3000 [06:33<14:20,  2.44it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.93e+6]Epoch 902/3000:  30%|███       | 902/3000 [06:33<14:20,  2.44it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.93e+6]Epoch 903/3000:  30%|███       | 902/3000 [06:33<14:20,  2.44it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.93e+6]Epoch 903/3000:  30%|███       | 903/3000 [06:34<14:29,  2.41it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.93e+6]Epoch 903/3000:  30%|███       | 903/3000 [06:34<14:29,  2.41it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.93e+6]Epoch 904/3000:  30%|███       | 903/3000 [06:34<14:29,  2.41it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.93e+6]Epoch 904/3000:  30%|███       | 904/3000 [06:34<15:15,  2.29it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.93e+6]Epoch 904/3000:  30%|███       | 904/3000 [06:34<15:15,  2.29it/s, v_num=1, train_loss_step=2.01e+6, train_loss_epoch=1.93e+6]Epoch 905/3000:  30%|███       | 904/3000 [06:34<15:15,  2.29it/s, v_num=1, train_loss_step=2.01e+6, train_loss_epoch=1.93e+6]Epoch 905/3000:  30%|███       | 905/3000 [06:34<14:59,  2.33it/s, v_num=1, train_loss_step=2.01e+6, train_loss_epoch=1.93e+6]Epoch 905/3000:  30%|███       | 905/3000 [06:34<14:59,  2.33it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.93e+6]Epoch 906/3000:  30%|███       | 905/3000 [06:34<14:59,  2.33it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.93e+6]Epoch 906/3000:  30%|███       | 906/3000 [06:35<14:12,  2.46it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.93e+6]Epoch 906/3000:  30%|███       | 906/3000 [06:35<14:12,  2.46it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.93e+6]Epoch 907/3000:  30%|███       | 906/3000 [06:35<14:12,  2.46it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.93e+6]Epoch 907/3000:  30%|███       | 907/3000 [06:35<14:29,  2.41it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.93e+6]Epoch 907/3000:  30%|███       | 907/3000 [06:35<14:29,  2.41it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.93e+6] Epoch 908/3000:  30%|███       | 907/3000 [06:35<14:29,  2.41it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.93e+6]Epoch 908/3000:  30%|███       | 908/3000 [06:36<14:40,  2.38it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.93e+6]Epoch 908/3000:  30%|███       | 908/3000 [06:36<14:40,  2.38it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.92e+6]Epoch 909/3000:  30%|███       | 908/3000 [06:36<14:40,  2.38it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.92e+6]Epoch 909/3000:  30%|███       | 909/3000 [06:36<14:14,  2.45it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.92e+6]Epoch 909/3000:  30%|███       | 909/3000 [06:36<14:14,  2.45it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.92e+6]Epoch 910/3000:  30%|███       | 909/3000 [06:36<14:14,  2.45it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.92e+6]Epoch 910/3000:  30%|███       | 910/3000 [06:36<14:55,  2.33it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.92e+6]Epoch 910/3000:  30%|███       | 910/3000 [06:36<14:55,  2.33it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=1.92e+6]Epoch 911/3000:  30%|███       | 910/3000 [06:36<14:55,  2.33it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=1.92e+6]Epoch 911/3000:  30%|███       | 911/3000 [06:37<15:06,  2.30it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=1.92e+6]Epoch 911/3000:  30%|███       | 911/3000 [06:37<15:06,  2.30it/s, v_num=1, train_loss_step=1.95e+6, train_loss_epoch=1.92e+6]Epoch 912/3000:  30%|███       | 911/3000 [06:37<15:06,  2.30it/s, v_num=1, train_loss_step=1.95e+6, train_loss_epoch=1.92e+6]Epoch 912/3000:  30%|███       | 912/3000 [06:37<15:23,  2.26it/s, v_num=1, train_loss_step=1.95e+6, train_loss_epoch=1.92e+6]Epoch 912/3000:  30%|███       | 912/3000 [06:37<15:23,  2.26it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.92e+6]Epoch 913/3000:  30%|███       | 912/3000 [06:37<15:23,  2.26it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.92e+6]Epoch 913/3000:  30%|███       | 913/3000 [06:38<14:37,  2.38it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.92e+6]Epoch 913/3000:  30%|███       | 913/3000 [06:38<14:37,  2.38it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.92e+6]Epoch 914/3000:  30%|███       | 913/3000 [06:38<14:37,  2.38it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.92e+6]Epoch 914/3000:  30%|███       | 914/3000 [06:38<14:18,  2.43it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.92e+6]Epoch 914/3000:  30%|███       | 914/3000 [06:38<14:18,  2.43it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.92e+6]Epoch 915/3000:  30%|███       | 914/3000 [06:38<14:18,  2.43it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.92e+6]Epoch 915/3000:  30%|███       | 915/3000 [06:39<14:55,  2.33it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.92e+6]Epoch 915/3000:  30%|███       | 915/3000 [06:39<14:55,  2.33it/s, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=1.92e+6]Epoch 916/3000:  30%|███       | 915/3000 [06:39<14:55,  2.33it/s, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=1.92e+6]Epoch 916/3000:  31%|███       | 916/3000 [06:39<15:32,  2.23it/s, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=1.92e+6]Epoch 916/3000:  31%|███       | 916/3000 [06:39<15:32,  2.23it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=1.92e+6]Epoch 917/3000:  31%|███       | 916/3000 [06:39<15:32,  2.23it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=1.92e+6]Epoch 917/3000:  31%|███       | 917/3000 [06:40<15:05,  2.30it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=1.92e+6]Epoch 917/3000:  31%|███       | 917/3000 [06:40<15:05,  2.30it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.91e+6]Epoch 918/3000:  31%|███       | 917/3000 [06:40<15:05,  2.30it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.91e+6]Epoch 918/3000:  31%|███       | 918/3000 [06:40<15:36,  2.22it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.91e+6]Epoch 918/3000:  31%|███       | 918/3000 [06:40<15:36,  2.22it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.91e+6]Epoch 919/3000:  31%|███       | 918/3000 [06:40<15:36,  2.22it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.91e+6]Epoch 919/3000:  31%|███       | 919/3000 [06:40<14:41,  2.36it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.91e+6]Epoch 919/3000:  31%|███       | 919/3000 [06:40<14:41,  2.36it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=1.91e+6]Epoch 920/3000:  31%|███       | 919/3000 [06:40<14:41,  2.36it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=1.91e+6]Epoch 920/3000:  31%|███       | 920/3000 [06:41<14:04,  2.46it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=1.91e+6]Epoch 920/3000:  31%|███       | 920/3000 [06:41<14:04,  2.46it/s, v_num=1, train_loss_step=1.95e+6, train_loss_epoch=1.91e+6]Epoch 921/3000:  31%|███       | 920/3000 [06:41<14:04,  2.46it/s, v_num=1, train_loss_step=1.95e+6, train_loss_epoch=1.91e+6]Epoch 921/3000:  31%|███       | 921/3000 [06:41<14:04,  2.46it/s, v_num=1, train_loss_step=1.95e+6, train_loss_epoch=1.91e+6]Epoch 921/3000:  31%|███       | 921/3000 [06:41<14:04,  2.46it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.91e+6]Epoch 922/3000:  31%|███       | 921/3000 [06:41<14:04,  2.46it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.91e+6]Epoch 922/3000:  31%|███       | 922/3000 [06:42<14:41,  2.36it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.91e+6]Epoch 922/3000:  31%|███       | 922/3000 [06:42<14:41,  2.36it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=1.91e+6]Epoch 923/3000:  31%|███       | 922/3000 [06:42<14:41,  2.36it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=1.91e+6]Epoch 923/3000:  31%|███       | 923/3000 [06:42<14:32,  2.38it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=1.91e+6]Epoch 923/3000:  31%|███       | 923/3000 [06:42<14:32,  2.38it/s, v_num=1, train_loss_step=2e+6, train_loss_epoch=1.91e+6]   Epoch 924/3000:  31%|███       | 923/3000 [06:42<14:32,  2.38it/s, v_num=1, train_loss_step=2e+6, train_loss_epoch=1.91e+6]Epoch 924/3000:  31%|███       | 924/3000 [06:42<15:02,  2.30it/s, v_num=1, train_loss_step=2e+6, train_loss_epoch=1.91e+6]Epoch 924/3000:  31%|███       | 924/3000 [06:42<15:02,  2.30it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.91e+6]Epoch 925/3000:  31%|███       | 924/3000 [06:42<15:02,  2.30it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.91e+6]Epoch 925/3000:  31%|███       | 925/3000 [06:43<14:55,  2.32it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.91e+6]Epoch 925/3000:  31%|███       | 925/3000 [06:43<14:55,  2.32it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.91e+6]Epoch 926/3000:  31%|███       | 925/3000 [06:43<14:55,  2.32it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.91e+6]Epoch 926/3000:  31%|███       | 926/3000 [06:43<15:00,  2.30it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.91e+6]Epoch 926/3000:  31%|███       | 926/3000 [06:43<15:00,  2.30it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.91e+6]Epoch 927/3000:  31%|███       | 926/3000 [06:43<15:00,  2.30it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.91e+6]Epoch 927/3000:  31%|███       | 927/3000 [06:44<14:23,  2.40it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.91e+6]Epoch 927/3000:  31%|███       | 927/3000 [06:44<14:23,  2.40it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.9e+6] Epoch 928/3000:  31%|███       | 927/3000 [06:44<14:23,  2.40it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.9e+6]Epoch 928/3000:  31%|███       | 928/3000 [06:44<15:20,  2.25it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.9e+6]Epoch 928/3000:  31%|███       | 928/3000 [06:44<15:20,  2.25it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.9e+6]Epoch 929/3000:  31%|███       | 928/3000 [06:44<15:20,  2.25it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.9e+6]Epoch 929/3000:  31%|███       | 929/3000 [06:45<15:00,  2.30it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.9e+6]Epoch 929/3000:  31%|███       | 929/3000 [06:45<15:00,  2.30it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.9e+6]Epoch 930/3000:  31%|███       | 929/3000 [06:45<15:00,  2.30it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.9e+6]Epoch 930/3000:  31%|███       | 930/3000 [06:45<15:30,  2.22it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.9e+6]Epoch 930/3000:  31%|███       | 930/3000 [06:45<15:30,  2.22it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.9e+6]Epoch 931/3000:  31%|███       | 930/3000 [06:45<15:30,  2.22it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.9e+6]Epoch 931/3000:  31%|███       | 931/3000 [06:46<14:49,  2.33it/s, v_num=1, train_loss_step=1.97e+6, train_loss_epoch=1.9e+6]Epoch 931/3000:  31%|███       | 931/3000 [06:46<14:49,  2.33it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.9e+6]Epoch 932/3000:  31%|███       | 931/3000 [06:46<14:49,  2.33it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.9e+6]Epoch 932/3000:  31%|███       | 932/3000 [06:46<16:06,  2.14it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.9e+6]Epoch 932/3000:  31%|███       | 932/3000 [06:46<16:06,  2.14it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.9e+6]Epoch 933/3000:  31%|███       | 932/3000 [06:46<16:06,  2.14it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.9e+6]Epoch 933/3000:  31%|███       | 933/3000 [06:46<14:53,  2.31it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.9e+6]Epoch 933/3000:  31%|███       | 933/3000 [06:46<14:53,  2.31it/s, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.9e+6]Epoch 934/3000:  31%|███       | 933/3000 [06:46<14:53,  2.31it/s, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.9e+6]Epoch 934/3000:  31%|███       | 934/3000 [06:47<15:51,  2.17it/s, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.9e+6]Epoch 934/3000:  31%|███       | 934/3000 [06:47<15:51,  2.17it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=1.9e+6]Epoch 935/3000:  31%|███       | 934/3000 [06:47<15:51,  2.17it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=1.9e+6]Epoch 935/3000:  31%|███       | 935/3000 [06:47<16:07,  2.13it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=1.9e+6]Epoch 935/3000:  31%|███       | 935/3000 [06:47<16:07,  2.13it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.9e+6]Epoch 936/3000:  31%|███       | 935/3000 [06:47<16:07,  2.13it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.9e+6]Epoch 936/3000:  31%|███       | 936/3000 [06:48<15:47,  2.18it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.9e+6]Epoch 936/3000:  31%|███       | 936/3000 [06:48<15:47,  2.18it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.9e+6]Epoch 937/3000:  31%|███       | 936/3000 [06:48<15:47,  2.18it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.9e+6]Epoch 937/3000:  31%|███       | 937/3000 [06:48<15:52,  2.17it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.9e+6]Epoch 937/3000:  31%|███       | 937/3000 [06:48<15:52,  2.17it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.89e+6]Epoch 938/3000:  31%|███       | 937/3000 [06:48<15:52,  2.17it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.89e+6]Epoch 938/3000:  31%|███▏      | 938/3000 [06:49<15:32,  2.21it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.89e+6]Epoch 938/3000:  31%|███▏      | 938/3000 [06:49<15:32,  2.21it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.89e+6]Epoch 939/3000:  31%|███▏      | 938/3000 [06:49<15:32,  2.21it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.89e+6]Epoch 939/3000:  31%|███▏      | 939/3000 [06:49<16:03,  2.14it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.89e+6]Epoch 939/3000:  31%|███▏      | 939/3000 [06:49<16:03,  2.14it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.89e+6] Epoch 940/3000:  31%|███▏      | 939/3000 [06:49<16:03,  2.14it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.89e+6]Epoch 940/3000:  31%|███▏      | 940/3000 [06:50<15:30,  2.21it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.89e+6]Epoch 940/3000:  31%|███▏      | 940/3000 [06:50<15:30,  2.21it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.89e+6]Epoch 941/3000:  31%|███▏      | 940/3000 [06:50<15:30,  2.21it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.89e+6]Epoch 941/3000:  31%|███▏      | 941/3000 [06:50<15:55,  2.16it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.89e+6]Epoch 941/3000:  31%|███▏      | 941/3000 [06:50<15:55,  2.16it/s, v_num=1, train_loss_step=1.98e+6, train_loss_epoch=1.89e+6]Epoch 942/3000:  31%|███▏      | 941/3000 [06:50<15:55,  2.16it/s, v_num=1, train_loss_step=1.98e+6, train_loss_epoch=1.89e+6]Epoch 942/3000:  31%|███▏      | 942/3000 [06:51<15:37,  2.19it/s, v_num=1, train_loss_step=1.98e+6, train_loss_epoch=1.89e+6]Epoch 942/3000:  31%|███▏      | 942/3000 [06:51<15:37,  2.19it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.89e+6]Epoch 943/3000:  31%|███▏      | 942/3000 [06:51<15:37,  2.19it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.89e+6]Epoch 943/3000:  31%|███▏      | 943/3000 [06:51<15:28,  2.21it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.89e+6]Epoch 943/3000:  31%|███▏      | 943/3000 [06:51<15:28,  2.21it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.89e+6]Epoch 944/3000:  31%|███▏      | 943/3000 [06:51<15:28,  2.21it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.89e+6]Epoch 944/3000:  31%|███▏      | 944/3000 [06:52<15:43,  2.18it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.89e+6]Epoch 944/3000:  31%|███▏      | 944/3000 [06:52<15:43,  2.18it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.89e+6]Epoch 945/3000:  31%|███▏      | 944/3000 [06:52<15:43,  2.18it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.89e+6]Epoch 945/3000:  32%|███▏      | 945/3000 [06:52<15:40,  2.19it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.89e+6]Epoch 945/3000:  32%|███▏      | 945/3000 [06:52<15:40,  2.19it/s, v_num=1, train_loss_step=1.95e+6, train_loss_epoch=1.89e+6]Epoch 946/3000:  32%|███▏      | 945/3000 [06:52<15:40,  2.19it/s, v_num=1, train_loss_step=1.95e+6, train_loss_epoch=1.89e+6]Epoch 946/3000:  32%|███▏      | 946/3000 [06:52<15:57,  2.15it/s, v_num=1, train_loss_step=1.95e+6, train_loss_epoch=1.89e+6]Epoch 946/3000:  32%|███▏      | 946/3000 [06:52<15:57,  2.15it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.89e+6]Epoch 947/3000:  32%|███▏      | 946/3000 [06:52<15:57,  2.15it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.89e+6]Epoch 947/3000:  32%|███▏      | 947/3000 [06:53<16:01,  2.13it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.89e+6]Epoch 947/3000:  32%|███▏      | 947/3000 [06:53<16:01,  2.13it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.89e+6]Epoch 948/3000:  32%|███▏      | 947/3000 [06:53<16:01,  2.13it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.89e+6]Epoch 948/3000:  32%|███▏      | 948/3000 [06:53<16:00,  2.14it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.89e+6]Epoch 948/3000:  32%|███▏      | 948/3000 [06:53<16:00,  2.14it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.88e+6]Epoch 949/3000:  32%|███▏      | 948/3000 [06:53<16:00,  2.14it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.88e+6]Epoch 949/3000:  32%|███▏      | 949/3000 [06:54<15:36,  2.19it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.88e+6]Epoch 949/3000:  32%|███▏      | 949/3000 [06:54<15:36,  2.19it/s, v_num=1, train_loss_step=1.95e+6, train_loss_epoch=1.88e+6]Epoch 950/3000:  32%|███▏      | 949/3000 [06:54<15:36,  2.19it/s, v_num=1, train_loss_step=1.95e+6, train_loss_epoch=1.88e+6]Epoch 950/3000:  32%|███▏      | 950/3000 [06:54<15:05,  2.27it/s, v_num=1, train_loss_step=1.95e+6, train_loss_epoch=1.88e+6]Epoch 950/3000:  32%|███▏      | 950/3000 [06:54<15:05,  2.27it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.88e+6] Epoch 951/3000:  32%|███▏      | 950/3000 [06:54<15:05,  2.27it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.88e+6]Epoch 951/3000:  32%|███▏      | 951/3000 [06:55<14:53,  2.29it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.88e+6]Epoch 951/3000:  32%|███▏      | 951/3000 [06:55<14:53,  2.29it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.88e+6]Epoch 952/3000:  32%|███▏      | 951/3000 [06:55<14:53,  2.29it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.88e+6]Epoch 952/3000:  32%|███▏      | 952/3000 [06:55<15:20,  2.23it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.88e+6]Epoch 952/3000:  32%|███▏      | 952/3000 [06:55<15:20,  2.23it/s, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.88e+6]Epoch 953/3000:  32%|███▏      | 952/3000 [06:55<15:20,  2.23it/s, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.88e+6]Epoch 953/3000:  32%|███▏      | 953/3000 [06:56<15:05,  2.26it/s, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.88e+6]Epoch 953/3000:  32%|███▏      | 953/3000 [06:56<15:05,  2.26it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.88e+6] Epoch 954/3000:  32%|███▏      | 953/3000 [06:56<15:05,  2.26it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.88e+6]Epoch 954/3000:  32%|███▏      | 954/3000 [06:56<15:19,  2.23it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.88e+6]Epoch 954/3000:  32%|███▏      | 954/3000 [06:56<15:19,  2.23it/s, v_num=1, train_loss_step=2e+6, train_loss_epoch=1.88e+6]  Epoch 955/3000:  32%|███▏      | 954/3000 [06:56<15:19,  2.23it/s, v_num=1, train_loss_step=2e+6, train_loss_epoch=1.88e+6]Epoch 955/3000:  32%|███▏      | 955/3000 [06:56<15:11,  2.24it/s, v_num=1, train_loss_step=2e+6, train_loss_epoch=1.88e+6]Epoch 955/3000:  32%|███▏      | 955/3000 [06:56<15:11,  2.24it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.88e+6]Epoch 956/3000:  32%|███▏      | 955/3000 [06:56<15:11,  2.24it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.88e+6]Epoch 956/3000:  32%|███▏      | 956/3000 [06:57<15:43,  2.17it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.88e+6]Epoch 956/3000:  32%|███▏      | 956/3000 [06:57<15:43,  2.17it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.88e+6]Epoch 957/3000:  32%|███▏      | 956/3000 [06:57<15:43,  2.17it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.88e+6]Epoch 957/3000:  32%|███▏      | 957/3000 [06:58<18:05,  1.88it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.88e+6]Epoch 957/3000:  32%|███▏      | 957/3000 [06:58<18:05,  1.88it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.88e+6] Epoch 958/3000:  32%|███▏      | 957/3000 [06:58<18:05,  1.88it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.88e+6]Epoch 958/3000:  32%|███▏      | 958/3000 [06:58<18:24,  1.85it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.88e+6]Epoch 958/3000:  32%|███▏      | 958/3000 [06:58<18:24,  1.85it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.88e+6]Epoch 959/3000:  32%|███▏      | 958/3000 [06:58<18:24,  1.85it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.88e+6]Epoch 959/3000:  32%|███▏      | 959/3000 [06:59<19:38,  1.73it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.88e+6]Epoch 959/3000:  32%|███▏      | 959/3000 [06:59<19:38,  1.73it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.87e+6]Epoch 960/3000:  32%|███▏      | 959/3000 [06:59<19:38,  1.73it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.87e+6]Epoch 960/3000:  32%|███▏      | 960/3000 [06:59<18:59,  1.79it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.87e+6]Epoch 960/3000:  32%|███▏      | 960/3000 [06:59<18:59,  1.79it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.87e+6]Epoch 961/3000:  32%|███▏      | 960/3000 [06:59<18:59,  1.79it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.87e+6]Epoch 961/3000:  32%|███▏      | 961/3000 [07:00<18:07,  1.88it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.87e+6]Epoch 961/3000:  32%|███▏      | 961/3000 [07:00<18:07,  1.88it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.87e+6]Epoch 962/3000:  32%|███▏      | 961/3000 [07:00<18:07,  1.88it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.87e+6]Epoch 962/3000:  32%|███▏      | 962/3000 [07:00<17:51,  1.90it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.87e+6]Epoch 962/3000:  32%|███▏      | 962/3000 [07:00<17:51,  1.90it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.87e+6]Epoch 963/3000:  32%|███▏      | 962/3000 [07:00<17:51,  1.90it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.87e+6]Epoch 963/3000:  32%|███▏      | 963/3000 [07:01<18:58,  1.79it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.87e+6]Epoch 963/3000:  32%|███▏      | 963/3000 [07:01<18:58,  1.79it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.87e+6]Epoch 964/3000:  32%|███▏      | 963/3000 [07:01<18:58,  1.79it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.87e+6]Epoch 964/3000:  32%|███▏      | 964/3000 [07:02<19:56,  1.70it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.87e+6]Epoch 964/3000:  32%|███▏      | 964/3000 [07:02<19:56,  1.70it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.87e+6]Epoch 965/3000:  32%|███▏      | 964/3000 [07:02<19:56,  1.70it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.87e+6]Epoch 965/3000:  32%|███▏      | 965/3000 [07:02<18:50,  1.80it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.87e+6]Epoch 965/3000:  32%|███▏      | 965/3000 [07:02<18:50,  1.80it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.87e+6]Epoch 966/3000:  32%|███▏      | 965/3000 [07:02<18:50,  1.80it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.87e+6]Epoch 966/3000:  32%|███▏      | 966/3000 [07:03<19:42,  1.72it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.87e+6]Epoch 966/3000:  32%|███▏      | 966/3000 [07:03<19:42,  1.72it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.87e+6]Epoch 967/3000:  32%|███▏      | 966/3000 [07:03<19:42,  1.72it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.87e+6]Epoch 967/3000:  32%|███▏      | 967/3000 [07:03<19:45,  1.72it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.87e+6]Epoch 967/3000:  32%|███▏      | 967/3000 [07:03<19:45,  1.72it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.87e+6]Epoch 968/3000:  32%|███▏      | 967/3000 [07:03<19:45,  1.72it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.87e+6]Epoch 968/3000:  32%|███▏      | 968/3000 [07:04<18:20,  1.85it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.87e+6]Epoch 968/3000:  32%|███▏      | 968/3000 [07:04<18:20,  1.85it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.87e+6]Epoch 969/3000:  32%|███▏      | 968/3000 [07:04<18:20,  1.85it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.87e+6]Epoch 969/3000:  32%|███▏      | 969/3000 [07:05<19:37,  1.72it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.87e+6]Epoch 969/3000:  32%|███▏      | 969/3000 [07:05<19:37,  1.72it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.87e+6]Epoch 970/3000:  32%|███▏      | 969/3000 [07:05<19:37,  1.72it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.87e+6]Epoch 970/3000:  32%|███▏      | 970/3000 [07:05<19:24,  1.74it/s, v_num=1, train_loss_step=1.94e+6, train_loss_epoch=1.87e+6]Epoch 970/3000:  32%|███▏      | 970/3000 [07:05<19:24,  1.74it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.86e+6]Epoch 971/3000:  32%|███▏      | 970/3000 [07:05<19:24,  1.74it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.86e+6]Epoch 971/3000:  32%|███▏      | 971/3000 [07:06<18:26,  1.83it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.86e+6]Epoch 971/3000:  32%|███▏      | 971/3000 [07:06<18:26,  1.83it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=1.86e+6]Epoch 972/3000:  32%|███▏      | 971/3000 [07:06<18:26,  1.83it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=1.86e+6]Epoch 972/3000:  32%|███▏      | 972/3000 [07:06<19:24,  1.74it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=1.86e+6]Epoch 972/3000:  32%|███▏      | 972/3000 [07:06<19:24,  1.74it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.86e+6]Epoch 973/3000:  32%|███▏      | 972/3000 [07:06<19:24,  1.74it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.86e+6]Epoch 973/3000:  32%|███▏      | 973/3000 [07:07<19:52,  1.70it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.86e+6]Epoch 973/3000:  32%|███▏      | 973/3000 [07:07<19:52,  1.70it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.86e+6]Epoch 974/3000:  32%|███▏      | 973/3000 [07:07<19:52,  1.70it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.86e+6]Epoch 974/3000:  32%|███▏      | 974/3000 [07:07<19:50,  1.70it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.86e+6]Epoch 974/3000:  32%|███▏      | 974/3000 [07:07<19:50,  1.70it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.86e+6]Epoch 975/3000:  32%|███▏      | 974/3000 [07:07<19:50,  1.70it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.86e+6]Epoch 975/3000:  32%|███▎      | 975/3000 [07:08<19:36,  1.72it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.86e+6]Epoch 975/3000:  32%|███▎      | 975/3000 [07:08<19:36,  1.72it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.86e+6]Epoch 976/3000:  32%|███▎      | 975/3000 [07:08<19:36,  1.72it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.86e+6]Epoch 976/3000:  33%|███▎      | 976/3000 [07:08<18:38,  1.81it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.86e+6]Epoch 976/3000:  33%|███▎      | 976/3000 [07:08<18:38,  1.81it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.86e+6]Epoch 977/3000:  33%|███▎      | 976/3000 [07:08<18:38,  1.81it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.86e+6]Epoch 977/3000:  33%|███▎      | 977/3000 [07:09<18:21,  1.84it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.86e+6]Epoch 977/3000:  33%|███▎      | 977/3000 [07:09<18:21,  1.84it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.86e+6]Epoch 978/3000:  33%|███▎      | 977/3000 [07:09<18:21,  1.84it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.86e+6]Epoch 978/3000:  33%|███▎      | 978/3000 [07:10<18:28,  1.82it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.86e+6]Epoch 978/3000:  33%|███▎      | 978/3000 [07:10<18:28,  1.82it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.86e+6]Epoch 979/3000:  33%|███▎      | 978/3000 [07:10<18:28,  1.82it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.86e+6]Epoch 979/3000:  33%|███▎      | 979/3000 [07:10<18:23,  1.83it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.86e+6]Epoch 979/3000:  33%|███▎      | 979/3000 [07:10<18:23,  1.83it/s, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=1.86e+6]Epoch 980/3000:  33%|███▎      | 979/3000 [07:10<18:23,  1.83it/s, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=1.86e+6]Epoch 980/3000:  33%|███▎      | 980/3000 [07:11<19:33,  1.72it/s, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=1.86e+6]Epoch 980/3000:  33%|███▎      | 980/3000 [07:11<19:33,  1.72it/s, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.86e+6]Epoch 981/3000:  33%|███▎      | 980/3000 [07:11<19:33,  1.72it/s, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.86e+6]Epoch 981/3000:  33%|███▎      | 981/3000 [07:11<19:14,  1.75it/s, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.86e+6]Epoch 981/3000:  33%|███▎      | 981/3000 [07:11<19:14,  1.75it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.85e+6] Epoch 982/3000:  33%|███▎      | 981/3000 [07:11<19:14,  1.75it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.85e+6]Epoch 982/3000:  33%|███▎      | 982/3000 [07:12<19:59,  1.68it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.85e+6]Epoch 982/3000:  33%|███▎      | 982/3000 [07:12<19:59,  1.68it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.85e+6]Epoch 983/3000:  33%|███▎      | 982/3000 [07:12<19:59,  1.68it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.85e+6]Epoch 983/3000:  33%|███▎      | 983/3000 [07:13<20:57,  1.60it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.85e+6]Epoch 983/3000:  33%|███▎      | 983/3000 [07:13<20:57,  1.60it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.85e+6]Epoch 984/3000:  33%|███▎      | 983/3000 [07:13<20:57,  1.60it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.85e+6]Epoch 984/3000:  33%|███▎      | 984/3000 [07:13<20:54,  1.61it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.85e+6]Epoch 984/3000:  33%|███▎      | 984/3000 [07:13<20:54,  1.61it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.85e+6]Epoch 985/3000:  33%|███▎      | 984/3000 [07:13<20:54,  1.61it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.85e+6]Epoch 985/3000:  33%|███▎      | 985/3000 [07:14<19:50,  1.69it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.85e+6]Epoch 985/3000:  33%|███▎      | 985/3000 [07:14<19:50,  1.69it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.85e+6]Epoch 986/3000:  33%|███▎      | 985/3000 [07:14<19:50,  1.69it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.85e+6]Epoch 986/3000:  33%|███▎      | 986/3000 [07:14<19:21,  1.73it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.85e+6]Epoch 986/3000:  33%|███▎      | 986/3000 [07:14<19:21,  1.73it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.85e+6]Epoch 987/3000:  33%|███▎      | 986/3000 [07:14<19:21,  1.73it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.85e+6]Epoch 987/3000:  33%|███▎      | 987/3000 [07:15<19:19,  1.74it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.85e+6]Epoch 987/3000:  33%|███▎      | 987/3000 [07:15<19:19,  1.74it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.85e+6]Epoch 988/3000:  33%|███▎      | 987/3000 [07:15<19:19,  1.74it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.85e+6]Epoch 988/3000:  33%|███▎      | 988/3000 [07:16<23:14,  1.44it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.85e+6]Epoch 988/3000:  33%|███▎      | 988/3000 [07:16<23:14,  1.44it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.85e+6] Epoch 989/3000:  33%|███▎      | 988/3000 [07:16<23:14,  1.44it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.85e+6]Epoch 989/3000:  33%|███▎      | 989/3000 [07:17<24:04,  1.39it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.85e+6]Epoch 989/3000:  33%|███▎      | 989/3000 [07:17<24:04,  1.39it/s, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.85e+6]Epoch 990/3000:  33%|███▎      | 989/3000 [07:17<24:04,  1.39it/s, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.85e+6]Epoch 990/3000:  33%|███▎      | 990/3000 [07:17<21:39,  1.55it/s, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.85e+6]Epoch 990/3000:  33%|███▎      | 990/3000 [07:17<21:39,  1.55it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.85e+6]Epoch 991/3000:  33%|███▎      | 990/3000 [07:17<21:39,  1.55it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.85e+6]Epoch 991/3000:  33%|███▎      | 991/3000 [07:18<20:03,  1.67it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.85e+6]Epoch 991/3000:  33%|███▎      | 991/3000 [07:18<20:03,  1.67it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.85e+6]Epoch 992/3000:  33%|███▎      | 991/3000 [07:18<20:03,  1.67it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.85e+6]Epoch 992/3000:  33%|███▎      | 992/3000 [07:18<18:48,  1.78it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.85e+6]Epoch 992/3000:  33%|███▎      | 992/3000 [07:18<18:48,  1.78it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.85e+6] Epoch 993/3000:  33%|███▎      | 992/3000 [07:18<18:48,  1.78it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.85e+6]Epoch 993/3000:  33%|███▎      | 993/3000 [07:19<18:59,  1.76it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.85e+6]Epoch 993/3000:  33%|███▎      | 993/3000 [07:19<18:59,  1.76it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.84e+6]Epoch 994/3000:  33%|███▎      | 993/3000 [07:19<18:59,  1.76it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.84e+6]Epoch 994/3000:  33%|███▎      | 994/3000 [07:19<18:01,  1.86it/s, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.84e+6]Epoch 994/3000:  33%|███▎      | 994/3000 [07:19<18:01,  1.86it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.84e+6]Epoch 995/3000:  33%|███▎      | 994/3000 [07:19<18:01,  1.86it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.84e+6]Epoch 995/3000:  33%|███▎      | 995/3000 [07:20<16:42,  2.00it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.84e+6]Epoch 995/3000:  33%|███▎      | 995/3000 [07:20<16:42,  2.00it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.84e+6] Epoch 996/3000:  33%|███▎      | 995/3000 [07:20<16:42,  2.00it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.84e+6]Epoch 996/3000:  33%|███▎      | 996/3000 [07:20<16:47,  1.99it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.84e+6]Epoch 996/3000:  33%|███▎      | 996/3000 [07:20<16:47,  1.99it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.84e+6]Epoch 997/3000:  33%|███▎      | 996/3000 [07:20<16:47,  1.99it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.84e+6]Epoch 997/3000:  33%|███▎      | 997/3000 [07:21<16:50,  1.98it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.84e+6]Epoch 997/3000:  33%|███▎      | 997/3000 [07:21<16:50,  1.98it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.84e+6]Epoch 998/3000:  33%|███▎      | 997/3000 [07:21<16:50,  1.98it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.84e+6]Epoch 998/3000:  33%|███▎      | 998/3000 [07:21<16:25,  2.03it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.84e+6]Epoch 998/3000:  33%|███▎      | 998/3000 [07:21<16:25,  2.03it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.84e+6]Epoch 999/3000:  33%|███▎      | 998/3000 [07:21<16:25,  2.03it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.84e+6]Epoch 999/3000:  33%|███▎      | 999/3000 [07:22<16:40,  2.00it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.84e+6]Epoch 999/3000:  33%|███▎      | 999/3000 [07:22<16:40,  2.00it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.84e+6]Epoch 1000/3000:  33%|███▎      | 999/3000 [07:22<16:40,  2.00it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.84e+6]Epoch 1000/3000:  33%|███▎      | 1000/3000 [07:22<16:25,  2.03it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.84e+6]Epoch 1000/3000:  33%|███▎      | 1000/3000 [07:22<16:25,  2.03it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.84e+6]Epoch 1001/3000:  33%|███▎      | 1000/3000 [07:22<16:25,  2.03it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.84e+6]Epoch 1001/3000:  33%|███▎      | 1001/3000 [07:22<15:51,  2.10it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.84e+6]Epoch 1001/3000:  33%|███▎      | 1001/3000 [07:22<15:51,  2.10it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.84e+6]Epoch 1002/3000:  33%|███▎      | 1001/3000 [07:22<15:51,  2.10it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.84e+6]Epoch 1002/3000:  33%|███▎      | 1002/3000 [07:23<15:46,  2.11it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.84e+6]Epoch 1002/3000:  33%|███▎      | 1002/3000 [07:23<15:46,  2.11it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.84e+6] Epoch 1003/3000:  33%|███▎      | 1002/3000 [07:23<15:46,  2.11it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.84e+6]Epoch 1003/3000:  33%|███▎      | 1003/3000 [07:23<15:46,  2.11it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.84e+6]Epoch 1003/3000:  33%|███▎      | 1003/3000 [07:23<15:46,  2.11it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.84e+6]Epoch 1004/3000:  33%|███▎      | 1003/3000 [07:23<15:46,  2.11it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.84e+6]Epoch 1004/3000:  33%|███▎      | 1004/3000 [07:24<16:09,  2.06it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.84e+6]Epoch 1004/3000:  33%|███▎      | 1004/3000 [07:24<16:09,  2.06it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.84e+6]Epoch 1005/3000:  33%|███▎      | 1004/3000 [07:24<16:09,  2.06it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.84e+6]Epoch 1005/3000:  34%|███▎      | 1005/3000 [07:24<16:50,  1.97it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.84e+6]Epoch 1005/3000:  34%|███▎      | 1005/3000 [07:24<16:50,  1.97it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.83e+6]Epoch 1006/3000:  34%|███▎      | 1005/3000 [07:24<16:50,  1.97it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.83e+6]Epoch 1006/3000:  34%|███▎      | 1006/3000 [07:25<16:38,  2.00it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.83e+6]Epoch 1006/3000:  34%|███▎      | 1006/3000 [07:25<16:38,  2.00it/s, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=1.83e+6]Epoch 1007/3000:  34%|███▎      | 1006/3000 [07:25<16:38,  2.00it/s, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=1.83e+6]Epoch 1007/3000:  34%|███▎      | 1007/3000 [07:25<16:09,  2.05it/s, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=1.83e+6]Epoch 1007/3000:  34%|███▎      | 1007/3000 [07:25<16:09,  2.05it/s, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.83e+6]Epoch 1008/3000:  34%|███▎      | 1007/3000 [07:25<16:09,  2.05it/s, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.83e+6]Epoch 1008/3000:  34%|███▎      | 1008/3000 [07:26<16:18,  2.03it/s, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.83e+6]Epoch 1008/3000:  34%|███▎      | 1008/3000 [07:26<16:18,  2.03it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.83e+6] Epoch 1009/3000:  34%|███▎      | 1008/3000 [07:26<16:18,  2.03it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.83e+6]Epoch 1009/3000:  34%|███▎      | 1009/3000 [07:26<16:11,  2.05it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.83e+6]Epoch 1009/3000:  34%|███▎      | 1009/3000 [07:26<16:11,  2.05it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.83e+6]Epoch 1010/3000:  34%|███▎      | 1009/3000 [07:26<16:11,  2.05it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.83e+6]Epoch 1010/3000:  34%|███▎      | 1010/3000 [07:27<15:49,  2.10it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.83e+6]Epoch 1010/3000:  34%|███▎      | 1010/3000 [07:27<15:49,  2.10it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.83e+6]Epoch 1011/3000:  34%|███▎      | 1010/3000 [07:27<15:49,  2.10it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.83e+6]Epoch 1011/3000:  34%|███▎      | 1011/3000 [07:27<16:19,  2.03it/s, v_num=1, train_loss_step=1.91e+6, train_loss_epoch=1.83e+6]Epoch 1011/3000:  34%|███▎      | 1011/3000 [07:27<16:19,  2.03it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.83e+6]Epoch 1012/3000:  34%|███▎      | 1011/3000 [07:27<16:19,  2.03it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.83e+6]Epoch 1012/3000:  34%|███▎      | 1012/3000 [07:28<16:09,  2.05it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.83e+6]Epoch 1012/3000:  34%|███▎      | 1012/3000 [07:28<16:09,  2.05it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.83e+6]Epoch 1013/3000:  34%|███▎      | 1012/3000 [07:28<16:09,  2.05it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.83e+6]Epoch 1013/3000:  34%|███▍      | 1013/3000 [07:28<15:39,  2.11it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.83e+6]Epoch 1013/3000:  34%|███▍      | 1013/3000 [07:28<15:39,  2.11it/s, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=1.83e+6]Epoch 1014/3000:  34%|███▍      | 1013/3000 [07:28<15:39,  2.11it/s, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=1.83e+6]Epoch 1014/3000:  34%|███▍      | 1014/3000 [07:29<15:24,  2.15it/s, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=1.83e+6]Epoch 1014/3000:  34%|███▍      | 1014/3000 [07:29<15:24,  2.15it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.83e+6]Epoch 1015/3000:  34%|███▍      | 1014/3000 [07:29<15:24,  2.15it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.83e+6]Epoch 1015/3000:  34%|███▍      | 1015/3000 [07:29<16:07,  2.05it/s, v_num=1, train_loss_step=1.88e+6, train_loss_epoch=1.83e+6]Epoch 1015/3000:  34%|███▍      | 1015/3000 [07:29<16:07,  2.05it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.83e+6]Epoch 1016/3000:  34%|███▍      | 1015/3000 [07:29<16:07,  2.05it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.83e+6]Epoch 1016/3000:  34%|███▍      | 1016/3000 [07:30<16:21,  2.02it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.83e+6]Epoch 1016/3000:  34%|███▍      | 1016/3000 [07:30<16:21,  2.02it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.83e+6]Epoch 1017/3000:  34%|███▍      | 1016/3000 [07:30<16:21,  2.02it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.83e+6]Epoch 1017/3000:  34%|███▍      | 1017/3000 [07:30<15:37,  2.12it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.83e+6]Epoch 1017/3000:  34%|███▍      | 1017/3000 [07:30<15:37,  2.12it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.82e+6]Epoch 1018/3000:  34%|███▍      | 1017/3000 [07:30<15:37,  2.12it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.82e+6]Epoch 1018/3000:  34%|███▍      | 1018/3000 [07:31<16:08,  2.05it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.82e+6]Epoch 1018/3000:  34%|███▍      | 1018/3000 [07:31<16:08,  2.05it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=1.82e+6]Epoch 1019/3000:  34%|███▍      | 1018/3000 [07:31<16:08,  2.05it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=1.82e+6]Epoch 1019/3000:  34%|███▍      | 1019/3000 [07:31<16:08,  2.05it/s, v_num=1, train_loss_step=1.96e+6, train_loss_epoch=1.82e+6]Epoch 1019/3000:  34%|███▍      | 1019/3000 [07:31<16:08,  2.05it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.82e+6]Epoch 1020/3000:  34%|███▍      | 1019/3000 [07:31<16:08,  2.05it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.82e+6]Epoch 1020/3000:  34%|███▍      | 1020/3000 [07:32<16:13,  2.03it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.82e+6]Epoch 1020/3000:  34%|███▍      | 1020/3000 [07:32<16:13,  2.03it/s, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.82e+6]Epoch 1021/3000:  34%|███▍      | 1020/3000 [07:32<16:13,  2.03it/s, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.82e+6]Epoch 1021/3000:  34%|███▍      | 1021/3000 [07:32<16:08,  2.04it/s, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.82e+6]Epoch 1021/3000:  34%|███▍      | 1021/3000 [07:32<16:08,  2.04it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.82e+6]Epoch 1022/3000:  34%|███▍      | 1021/3000 [07:32<16:08,  2.04it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.82e+6]Epoch 1022/3000:  34%|███▍      | 1022/3000 [07:33<16:41,  1.98it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.82e+6]Epoch 1022/3000:  34%|███▍      | 1022/3000 [07:33<16:41,  1.98it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.82e+6]Epoch 1023/3000:  34%|███▍      | 1022/3000 [07:33<16:41,  1.98it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.82e+6]Epoch 1023/3000:  34%|███▍      | 1023/3000 [07:33<16:40,  1.98it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.82e+6]Epoch 1023/3000:  34%|███▍      | 1023/3000 [07:33<16:40,  1.98it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.82e+6]Epoch 1024/3000:  34%|███▍      | 1023/3000 [07:33<16:40,  1.98it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.82e+6]Epoch 1024/3000:  34%|███▍      | 1024/3000 [07:34<16:22,  2.01it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.82e+6]Epoch 1024/3000:  34%|███▍      | 1024/3000 [07:34<16:22,  2.01it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.82e+6]Epoch 1025/3000:  34%|███▍      | 1024/3000 [07:34<16:22,  2.01it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.82e+6]Epoch 1025/3000:  34%|███▍      | 1025/3000 [07:34<16:24,  2.01it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.82e+6]Epoch 1025/3000:  34%|███▍      | 1025/3000 [07:34<16:24,  2.01it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=1.82e+6]Epoch 1026/3000:  34%|███▍      | 1025/3000 [07:34<16:24,  2.01it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=1.82e+6]Epoch 1026/3000:  34%|███▍      | 1026/3000 [07:35<16:07,  2.04it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=1.82e+6]Epoch 1026/3000:  34%|███▍      | 1026/3000 [07:35<16:07,  2.04it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.82e+6]Epoch 1027/3000:  34%|███▍      | 1026/3000 [07:35<16:07,  2.04it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.82e+6]Epoch 1027/3000:  34%|███▍      | 1027/3000 [07:35<15:44,  2.09it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.82e+6]Epoch 1027/3000:  34%|███▍      | 1027/3000 [07:35<15:44,  2.09it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.82e+6] Epoch 1028/3000:  34%|███▍      | 1027/3000 [07:35<15:44,  2.09it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.82e+6]Epoch 1028/3000:  34%|███▍      | 1028/3000 [07:36<16:02,  2.05it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.82e+6]Epoch 1028/3000:  34%|███▍      | 1028/3000 [07:36<16:02,  2.05it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.82e+6]Epoch 1029/3000:  34%|███▍      | 1028/3000 [07:36<16:02,  2.05it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.82e+6]Epoch 1029/3000:  34%|███▍      | 1029/3000 [07:36<16:41,  1.97it/s, v_num=1, train_loss_step=1.92e+6, train_loss_epoch=1.82e+6]Epoch 1029/3000:  34%|███▍      | 1029/3000 [07:36<16:41,  1.97it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.82e+6]Epoch 1030/3000:  34%|███▍      | 1029/3000 [07:36<16:41,  1.97it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.82e+6]Epoch 1030/3000:  34%|███▍      | 1030/3000 [07:37<16:21,  2.01it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.82e+6]Epoch 1030/3000:  34%|███▍      | 1030/3000 [07:37<16:21,  2.01it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.81e+6]Epoch 1031/3000:  34%|███▍      | 1030/3000 [07:37<16:21,  2.01it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.81e+6]Epoch 1031/3000:  34%|███▍      | 1031/3000 [07:37<16:36,  1.98it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.81e+6]Epoch 1031/3000:  34%|███▍      | 1031/3000 [07:37<16:36,  1.98it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.81e+6]Epoch 1032/3000:  34%|███▍      | 1031/3000 [07:37<16:36,  1.98it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.81e+6]Epoch 1032/3000:  34%|███▍      | 1032/3000 [07:38<16:25,  2.00it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.81e+6]Epoch 1032/3000:  34%|███▍      | 1032/3000 [07:38<16:25,  2.00it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.81e+6]Epoch 1033/3000:  34%|███▍      | 1032/3000 [07:38<16:25,  2.00it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.81e+6]Epoch 1033/3000:  34%|███▍      | 1033/3000 [07:38<15:11,  2.16it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.81e+6]Epoch 1033/3000:  34%|███▍      | 1033/3000 [07:38<15:11,  2.16it/s, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=1.81e+6]Epoch 1034/3000:  34%|███▍      | 1033/3000 [07:38<15:11,  2.16it/s, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=1.81e+6]Epoch 1034/3000:  34%|███▍      | 1034/3000 [07:39<15:29,  2.12it/s, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=1.81e+6]Epoch 1034/3000:  34%|███▍      | 1034/3000 [07:39<15:29,  2.12it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.81e+6]Epoch 1035/3000:  34%|███▍      | 1034/3000 [07:39<15:29,  2.12it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.81e+6]Epoch 1035/3000:  34%|███▍      | 1035/3000 [07:39<15:22,  2.13it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.81e+6]Epoch 1035/3000:  34%|███▍      | 1035/3000 [07:39<15:22,  2.13it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.81e+6]Epoch 1036/3000:  34%|███▍      | 1035/3000 [07:39<15:22,  2.13it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.81e+6]Epoch 1036/3000:  35%|███▍      | 1036/3000 [07:40<15:55,  2.06it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.81e+6]Epoch 1036/3000:  35%|███▍      | 1036/3000 [07:40<15:55,  2.06it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.81e+6] Epoch 1037/3000:  35%|███▍      | 1036/3000 [07:40<15:55,  2.06it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.81e+6]Epoch 1037/3000:  35%|███▍      | 1037/3000 [07:40<15:17,  2.14it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.81e+6]Epoch 1037/3000:  35%|███▍      | 1037/3000 [07:40<15:17,  2.14it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.81e+6]Epoch 1038/3000:  35%|███▍      | 1037/3000 [07:40<15:17,  2.14it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.81e+6]Epoch 1038/3000:  35%|███▍      | 1038/3000 [07:40<15:11,  2.15it/s, v_num=1, train_loss_step=1.9e+6, train_loss_epoch=1.81e+6]Epoch 1038/3000:  35%|███▍      | 1038/3000 [07:40<15:11,  2.15it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.81e+6]Epoch 1039/3000:  35%|███▍      | 1038/3000 [07:40<15:11,  2.15it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.81e+6]Epoch 1039/3000:  35%|███▍      | 1039/3000 [10:51<31:18:45, 57.48s/it, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.81e+6]Epoch 1039/3000:  35%|███▍      | 1039/3000 [10:51<31:18:45, 57.48s/it, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.81e+6]Epoch 1040/3000:  35%|███▍      | 1039/3000 [10:51<31:18:45, 57.48s/it, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.81e+6]Epoch 1040/3000:  35%|███▍      | 1040/3000 [10:51<21:58:57, 40.38s/it, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.81e+6]Epoch 1040/3000:  35%|███▍      | 1040/3000 [10:51<21:58:57, 40.38s/it, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.81e+6] Epoch 1041/3000:  35%|███▍      | 1040/3000 [10:51<21:58:57, 40.38s/it, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.81e+6]Epoch 1041/3000:  35%|███▍      | 1041/3000 [10:52<15:27:36, 28.41s/it, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.81e+6]Epoch 1041/3000:  35%|███▍      | 1041/3000 [10:52<15:27:36, 28.41s/it, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.81e+6]Epoch 1042/3000:  35%|███▍      | 1041/3000 [10:52<15:27:36, 28.41s/it, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.81e+6]Epoch 1042/3000:  35%|███▍      | 1042/3000 [10:52<10:54:08, 20.05s/it, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.81e+6]Epoch 1042/3000:  35%|███▍      | 1042/3000 [10:52<10:54:08, 20.05s/it, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.81e+6]Epoch 1043/3000:  35%|███▍      | 1042/3000 [10:52<10:54:08, 20.05s/it, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.81e+6]Epoch 1043/3000:  35%|███▍      | 1043/3000 [10:53<7:42:12, 14.17s/it, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.81e+6] Epoch 1043/3000:  35%|███▍      | 1043/3000 [10:53<7:42:12, 14.17s/it, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.8e+6] Epoch 1044/3000:  35%|███▍      | 1043/3000 [10:53<7:42:12, 14.17s/it, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.8e+6]Epoch 1044/3000:  35%|███▍      | 1044/3000 [10:53<5:27:48, 10.06s/it, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.8e+6]Epoch 1044/3000:  35%|███▍      | 1044/3000 [10:53<5:27:48, 10.06s/it, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=1.8e+6]Epoch 1045/3000:  35%|███▍      | 1044/3000 [10:53<5:27:48, 10.06s/it, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=1.8e+6]Epoch 1045/3000:  35%|███▍      | 1045/3000 [10:54<3:54:16,  7.19s/it, v_num=1, train_loss_step=1.87e+6, train_loss_epoch=1.8e+6]Epoch 1045/3000:  35%|███▍      | 1045/3000 [10:54<3:54:16,  7.19s/it, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.8e+6]Epoch 1046/3000:  35%|███▍      | 1045/3000 [10:54<3:54:16,  7.19s/it, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.8e+6]Epoch 1046/3000:  35%|███▍      | 1046/3000 [10:54<2:48:32,  5.18s/it, v_num=1, train_loss_step=1.93e+6, train_loss_epoch=1.8e+6]Epoch 1046/3000:  35%|███▍      | 1046/3000 [10:54<2:48:32,  5.18s/it, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.8e+6]Epoch 1047/3000:  35%|███▍      | 1046/3000 [10:54<2:48:32,  5.18s/it, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.8e+6]Epoch 1047/3000:  35%|███▍      | 1047/3000 [10:55<2:02:27,  3.76s/it, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.8e+6]Epoch 1047/3000:  35%|███▍      | 1047/3000 [10:55<2:02:27,  3.76s/it, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.8e+6]Epoch 1048/3000:  35%|███▍      | 1047/3000 [10:55<2:02:27,  3.76s/it, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.8e+6]Epoch 1048/3000:  35%|███▍      | 1048/3000 [10:55<1:30:18,  2.78s/it, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.8e+6]Epoch 1048/3000:  35%|███▍      | 1048/3000 [10:55<1:30:18,  2.78s/it, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.8e+6]Epoch 1049/3000:  35%|███▍      | 1048/3000 [10:55<1:30:18,  2.78s/it, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.8e+6]Epoch 1049/3000:  35%|███▍      | 1049/3000 [10:56<1:08:34,  2.11s/it, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.8e+6]Epoch 1049/3000:  35%|███▍      | 1049/3000 [10:56<1:08:34,  2.11s/it, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.8e+6]Epoch 1050/3000:  35%|███▍      | 1049/3000 [10:56<1:08:34,  2.11s/it, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.8e+6]Epoch 1050/3000:  35%|███▌      | 1050/3000 [10:56<53:01,  1.63s/it, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.8e+6]  Epoch 1050/3000:  35%|███▌      | 1050/3000 [10:56<53:01,  1.63s/it, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.8e+6]Epoch 1051/3000:  35%|███▌      | 1050/3000 [10:56<53:01,  1.63s/it, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.8e+6]Epoch 1051/3000:  35%|███▌      | 1051/3000 [10:57<42:18,  1.30s/it, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.8e+6]Epoch 1051/3000:  35%|███▌      | 1051/3000 [10:57<42:18,  1.30s/it, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.8e+6] Epoch 1052/3000:  35%|███▌      | 1051/3000 [10:57<42:18,  1.30s/it, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.8e+6]Epoch 1052/3000:  35%|███▌      | 1052/3000 [10:57<35:37,  1.10s/it, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.8e+6]Epoch 1052/3000:  35%|███▌      | 1052/3000 [10:57<35:37,  1.10s/it, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.8e+6]Epoch 1053/3000:  35%|███▌      | 1052/3000 [10:57<35:37,  1.10s/it, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.8e+6]Epoch 1053/3000:  35%|███▌      | 1053/3000 [10:58<29:23,  1.10it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.8e+6]Epoch 1053/3000:  35%|███▌      | 1053/3000 [10:58<29:23,  1.10it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.8e+6]Epoch 1054/3000:  35%|███▌      | 1053/3000 [10:58<29:23,  1.10it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.8e+6]Epoch 1054/3000:  35%|███▌      | 1054/3000 [10:58<25:45,  1.26it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.8e+6]Epoch 1054/3000:  35%|███▌      | 1054/3000 [10:58<25:45,  1.26it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.8e+6]Epoch 1055/3000:  35%|███▌      | 1054/3000 [10:58<25:45,  1.26it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.8e+6]Epoch 1055/3000:  35%|███▌      | 1055/3000 [10:59<22:20,  1.45it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.8e+6]Epoch 1055/3000:  35%|███▌      | 1055/3000 [10:59<22:20,  1.45it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.8e+6]Epoch 1056/3000:  35%|███▌      | 1055/3000 [10:59<22:20,  1.45it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.8e+6]Epoch 1056/3000:  35%|███▌      | 1056/3000 [10:59<19:55,  1.63it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.8e+6]Epoch 1056/3000:  35%|███▌      | 1056/3000 [10:59<19:55,  1.63it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.79e+6]Epoch 1057/3000:  35%|███▌      | 1056/3000 [10:59<19:55,  1.63it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.79e+6]Epoch 1057/3000:  35%|███▌      | 1057/3000 [11:00<18:51,  1.72it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.79e+6]Epoch 1057/3000:  35%|███▌      | 1057/3000 [11:00<18:51,  1.72it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.79e+6]Epoch 1058/3000:  35%|███▌      | 1057/3000 [11:00<18:51,  1.72it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.79e+6]Epoch 1058/3000:  35%|███▌      | 1058/3000 [11:00<17:22,  1.86it/s, v_num=1, train_loss_step=1.85e+6, train_loss_epoch=1.79e+6]Epoch 1058/3000:  35%|███▌      | 1058/3000 [11:00<17:22,  1.86it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.79e+6]Epoch 1059/3000:  35%|███▌      | 1058/3000 [11:00<17:22,  1.86it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.79e+6]Epoch 1059/3000:  35%|███▌      | 1059/3000 [11:01<16:31,  1.96it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.79e+6]Epoch 1059/3000:  35%|███▌      | 1059/3000 [11:01<16:31,  1.96it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.79e+6]Epoch 1060/3000:  35%|███▌      | 1059/3000 [11:01<16:31,  1.96it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.79e+6]Epoch 1060/3000:  35%|███▌      | 1060/3000 [11:01<17:06,  1.89it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.79e+6]Epoch 1060/3000:  35%|███▌      | 1060/3000 [11:01<17:06,  1.89it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.79e+6]Epoch 1061/3000:  35%|███▌      | 1060/3000 [11:01<17:06,  1.89it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.79e+6]Epoch 1061/3000:  35%|███▌      | 1061/3000 [11:02<16:20,  1.98it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.79e+6]Epoch 1061/3000:  35%|███▌      | 1061/3000 [11:02<16:20,  1.98it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.79e+6]Epoch 1062/3000:  35%|███▌      | 1061/3000 [11:02<16:20,  1.98it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.79e+6]Epoch 1062/3000:  35%|███▌      | 1062/3000 [11:02<15:56,  2.03it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.79e+6]Epoch 1062/3000:  35%|███▌      | 1062/3000 [11:02<15:56,  2.03it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.79e+6]Epoch 1063/3000:  35%|███▌      | 1062/3000 [11:02<15:56,  2.03it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.79e+6]Epoch 1063/3000:  35%|███▌      | 1063/3000 [11:03<16:13,  1.99it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.79e+6]Epoch 1063/3000:  35%|███▌      | 1063/3000 [11:03<16:13,  1.99it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.79e+6]Epoch 1064/3000:  35%|███▌      | 1063/3000 [11:03<16:13,  1.99it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.79e+6]Epoch 1064/3000:  35%|███▌      | 1064/3000 [11:03<16:24,  1.97it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.79e+6]Epoch 1064/3000:  35%|███▌      | 1064/3000 [11:03<16:24,  1.97it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.79e+6]Epoch 1065/3000:  35%|███▌      | 1064/3000 [11:03<16:24,  1.97it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.79e+6]Epoch 1065/3000:  36%|███▌      | 1065/3000 [11:04<16:28,  1.96it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.79e+6]Epoch 1065/3000:  36%|███▌      | 1065/3000 [11:04<16:28,  1.96it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.79e+6]Epoch 1066/3000:  36%|███▌      | 1065/3000 [11:04<16:28,  1.96it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.79e+6]Epoch 1066/3000:  36%|███▌      | 1066/3000 [11:04<16:51,  1.91it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.79e+6]Epoch 1066/3000:  36%|███▌      | 1066/3000 [11:04<16:51,  1.91it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.79e+6]Epoch 1067/3000:  36%|███▌      | 1066/3000 [11:04<16:51,  1.91it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.79e+6]Epoch 1067/3000:  36%|███▌      | 1067/3000 [11:05<16:40,  1.93it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.79e+6]Epoch 1067/3000:  36%|███▌      | 1067/3000 [11:05<16:40,  1.93it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.79e+6]Epoch 1068/3000:  36%|███▌      | 1067/3000 [11:05<16:40,  1.93it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.79e+6]Epoch 1068/3000:  36%|███▌      | 1068/3000 [11:05<16:15,  1.98it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.79e+6]Epoch 1068/3000:  36%|███▌      | 1068/3000 [11:05<16:15,  1.98it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.79e+6]Epoch 1069/3000:  36%|███▌      | 1068/3000 [11:05<16:15,  1.98it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.79e+6]Epoch 1069/3000:  36%|███▌      | 1069/3000 [11:06<15:59,  2.01it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.79e+6]Epoch 1069/3000:  36%|███▌      | 1069/3000 [11:06<15:59,  2.01it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.79e+6]Epoch 1070/3000:  36%|███▌      | 1069/3000 [11:06<15:59,  2.01it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.79e+6]Epoch 1070/3000:  36%|███▌      | 1070/3000 [11:06<17:13,  1.87it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.79e+6]Epoch 1070/3000:  36%|███▌      | 1070/3000 [11:06<17:13,  1.87it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=1.78e+6]Epoch 1071/3000:  36%|███▌      | 1070/3000 [11:06<17:13,  1.87it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=1.78e+6]Epoch 1071/3000:  36%|███▌      | 1071/3000 [11:07<18:50,  1.71it/s, v_num=1, train_loss_step=1.89e+6, train_loss_epoch=1.78e+6]Epoch 1071/3000:  36%|███▌      | 1071/3000 [11:07<18:50,  1.71it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.78e+6]Epoch 1072/3000:  36%|███▌      | 1071/3000 [11:07<18:50,  1.71it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.78e+6]Epoch 1072/3000:  36%|███▌      | 1072/3000 [11:08<18:21,  1.75it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.78e+6]Epoch 1072/3000:  36%|███▌      | 1072/3000 [11:08<18:21,  1.75it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.78e+6] Epoch 1073/3000:  36%|███▌      | 1072/3000 [11:08<18:21,  1.75it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.78e+6]Epoch 1073/3000:  36%|███▌      | 1073/3000 [11:08<17:24,  1.84it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.78e+6]Epoch 1073/3000:  36%|███▌      | 1073/3000 [11:08<17:24,  1.84it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.78e+6]Epoch 1074/3000:  36%|███▌      | 1073/3000 [11:08<17:24,  1.84it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.78e+6]Epoch 1074/3000:  36%|███▌      | 1074/3000 [11:09<17:20,  1.85it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.78e+6]Epoch 1074/3000:  36%|███▌      | 1074/3000 [11:09<17:20,  1.85it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.78e+6]Epoch 1075/3000:  36%|███▌      | 1074/3000 [11:09<17:20,  1.85it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.78e+6]Epoch 1075/3000:  36%|███▌      | 1075/3000 [11:09<16:36,  1.93it/s, v_num=1, train_loss_step=1.83e+6, train_loss_epoch=1.78e+6]Epoch 1075/3000:  36%|███▌      | 1075/3000 [11:09<16:36,  1.93it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.78e+6]Epoch 1076/3000:  36%|███▌      | 1075/3000 [11:09<16:36,  1.93it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.78e+6]Epoch 1076/3000:  36%|███▌      | 1076/3000 [11:10<16:38,  1.93it/s, v_num=1, train_loss_step=1.84e+6, train_loss_epoch=1.78e+6]Epoch 1076/3000:  36%|███▌      | 1076/3000 [11:10<16:38,  1.93it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.78e+6]Epoch 1077/3000:  36%|███▌      | 1076/3000 [11:10<16:38,  1.93it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.78e+6]Epoch 1077/3000:  36%|███▌      | 1077/3000 [11:10<16:37,  1.93it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.78e+6]Epoch 1077/3000:  36%|███▌      | 1077/3000 [11:10<16:37,  1.93it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.78e+6]Epoch 1078/3000:  36%|███▌      | 1077/3000 [11:10<16:37,  1.93it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.78e+6]Epoch 1078/3000:  36%|███▌      | 1078/3000 [11:11<16:53,  1.90it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.78e+6]Epoch 1078/3000:  36%|███▌      | 1078/3000 [11:11<16:53,  1.90it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.78e+6]Epoch 1079/3000:  36%|███▌      | 1078/3000 [11:11<16:53,  1.90it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.78e+6]Epoch 1079/3000:  36%|███▌      | 1079/3000 [11:11<16:32,  1.94it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.78e+6]Epoch 1079/3000:  36%|███▌      | 1079/3000 [11:11<16:32,  1.94it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.78e+6]Epoch 1080/3000:  36%|███▌      | 1079/3000 [11:11<16:32,  1.94it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.78e+6]Epoch 1080/3000:  36%|███▌      | 1080/3000 [11:12<16:43,  1.91it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.78e+6]Epoch 1080/3000:  36%|███▌      | 1080/3000 [11:12<16:43,  1.91it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.78e+6]Epoch 1081/3000:  36%|███▌      | 1080/3000 [11:12<16:43,  1.91it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.78e+6]Epoch 1081/3000:  36%|███▌      | 1081/3000 [11:12<16:41,  1.92it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.78e+6]Epoch 1081/3000:  36%|███▌      | 1081/3000 [11:12<16:41,  1.92it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.78e+6]Epoch 1082/3000:  36%|███▌      | 1081/3000 [11:12<16:41,  1.92it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.78e+6]Epoch 1082/3000:  36%|███▌      | 1082/3000 [11:13<16:25,  1.95it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.78e+6]Epoch 1082/3000:  36%|███▌      | 1082/3000 [11:13<16:25,  1.95it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.78e+6]Epoch 1083/3000:  36%|███▌      | 1082/3000 [11:13<16:25,  1.95it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.78e+6]Epoch 1083/3000:  36%|███▌      | 1083/3000 [11:13<15:20,  2.08it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.78e+6]Epoch 1083/3000:  36%|███▌      | 1083/3000 [11:13<15:20,  2.08it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.78e+6]Epoch 1084/3000:  36%|███▌      | 1083/3000 [11:13<15:20,  2.08it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.78e+6]Epoch 1084/3000:  36%|███▌      | 1084/3000 [11:14<15:04,  2.12it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.78e+6]Epoch 1084/3000:  36%|███▌      | 1084/3000 [11:14<15:04,  2.12it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.77e+6]Epoch 1085/3000:  36%|███▌      | 1084/3000 [11:14<15:04,  2.12it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.77e+6]Epoch 1085/3000:  36%|███▌      | 1085/3000 [11:14<14:48,  2.16it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.77e+6]Epoch 1085/3000:  36%|███▌      | 1085/3000 [11:14<14:48,  2.16it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.77e+6]Epoch 1086/3000:  36%|███▌      | 1085/3000 [11:14<14:48,  2.16it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.77e+6]Epoch 1086/3000:  36%|███▌      | 1086/3000 [11:15<16:11,  1.97it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.77e+6]Epoch 1086/3000:  36%|███▌      | 1086/3000 [11:15<16:11,  1.97it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.77e+6]Epoch 1087/3000:  36%|███▌      | 1086/3000 [11:15<16:11,  1.97it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.77e+6]Epoch 1087/3000:  36%|███▌      | 1087/3000 [11:15<15:14,  2.09it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.77e+6]Epoch 1087/3000:  36%|███▌      | 1087/3000 [11:15<15:14,  2.09it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.77e+6]Epoch 1088/3000:  36%|███▌      | 1087/3000 [11:15<15:14,  2.09it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.77e+6]Epoch 1088/3000:  36%|███▋      | 1088/3000 [11:16<14:54,  2.14it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.77e+6]Epoch 1088/3000:  36%|███▋      | 1088/3000 [11:16<14:54,  2.14it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.77e+6]Epoch 1089/3000:  36%|███▋      | 1088/3000 [11:16<14:54,  2.14it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.77e+6]Epoch 1089/3000:  36%|███▋      | 1089/3000 [11:16<14:45,  2.16it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.77e+6]Epoch 1089/3000:  36%|███▋      | 1089/3000 [11:16<14:45,  2.16it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.77e+6]Epoch 1090/3000:  36%|███▋      | 1089/3000 [11:16<14:45,  2.16it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.77e+6]Epoch 1090/3000:  36%|███▋      | 1090/3000 [11:16<14:23,  2.21it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.77e+6]Epoch 1090/3000:  36%|███▋      | 1090/3000 [11:16<14:23,  2.21it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.77e+6]Epoch 1091/3000:  36%|███▋      | 1090/3000 [11:16<14:23,  2.21it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.77e+6]Epoch 1091/3000:  36%|███▋      | 1091/3000 [11:17<14:59,  2.12it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.77e+6]Epoch 1091/3000:  36%|███▋      | 1091/3000 [11:17<14:59,  2.12it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.77e+6]Epoch 1092/3000:  36%|███▋      | 1091/3000 [11:17<14:59,  2.12it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.77e+6]Epoch 1092/3000:  36%|███▋      | 1092/3000 [11:17<14:41,  2.16it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.77e+6]Epoch 1092/3000:  36%|███▋      | 1092/3000 [11:17<14:41,  2.16it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.77e+6]Epoch 1093/3000:  36%|███▋      | 1092/3000 [11:17<14:41,  2.16it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.77e+6]Epoch 1093/3000:  36%|███▋      | 1093/3000 [11:18<15:16,  2.08it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.77e+6]Epoch 1093/3000:  36%|███▋      | 1093/3000 [11:18<15:16,  2.08it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.77e+6]Epoch 1094/3000:  36%|███▋      | 1093/3000 [11:18<15:16,  2.08it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.77e+6]Epoch 1094/3000:  36%|███▋      | 1094/3000 [11:18<15:09,  2.10it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.77e+6]Epoch 1094/3000:  36%|███▋      | 1094/3000 [11:18<15:09,  2.10it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.77e+6]Epoch 1095/3000:  36%|███▋      | 1094/3000 [11:18<15:09,  2.10it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.77e+6]Epoch 1095/3000:  36%|███▋      | 1095/3000 [11:19<14:42,  2.16it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.77e+6]Epoch 1095/3000:  36%|███▋      | 1095/3000 [11:19<14:42,  2.16it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.77e+6]Epoch 1096/3000:  36%|███▋      | 1095/3000 [11:19<14:42,  2.16it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.77e+6]Epoch 1096/3000:  37%|███▋      | 1096/3000 [11:19<15:01,  2.11it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.77e+6]Epoch 1096/3000:  37%|███▋      | 1096/3000 [11:19<15:01,  2.11it/s, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.77e+6]Epoch 1097/3000:  37%|███▋      | 1096/3000 [11:19<15:01,  2.11it/s, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.77e+6]Epoch 1097/3000:  37%|███▋      | 1097/3000 [11:20<15:03,  2.11it/s, v_num=1, train_loss_step=1.86e+6, train_loss_epoch=1.77e+6]Epoch 1097/3000:  37%|███▋      | 1097/3000 [11:20<15:03,  2.11it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.77e+6]Epoch 1098/3000:  37%|███▋      | 1097/3000 [11:20<15:03,  2.11it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.77e+6]Epoch 1098/3000:  37%|███▋      | 1098/3000 [11:20<14:39,  2.16it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.77e+6]Epoch 1098/3000:  37%|███▋      | 1098/3000 [11:20<14:39,  2.16it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.77e+6]Epoch 1099/3000:  37%|███▋      | 1098/3000 [11:20<14:39,  2.16it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.77e+6]Epoch 1099/3000:  37%|███▋      | 1099/3000 [11:21<15:55,  1.99it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.77e+6]Epoch 1099/3000:  37%|███▋      | 1099/3000 [11:21<15:55,  1.99it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.76e+6]Epoch 1100/3000:  37%|███▋      | 1099/3000 [11:21<15:55,  1.99it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.76e+6]Epoch 1100/3000:  37%|███▋      | 1100/3000 [11:21<15:53,  1.99it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.76e+6]Epoch 1100/3000:  37%|███▋      | 1100/3000 [11:21<15:53,  1.99it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.76e+6]Epoch 1101/3000:  37%|███▋      | 1100/3000 [11:21<15:53,  1.99it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.76e+6]Epoch 1101/3000:  37%|███▋      | 1101/3000 [11:22<15:31,  2.04it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.76e+6]Epoch 1101/3000:  37%|███▋      | 1101/3000 [11:22<15:31,  2.04it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.76e+6]Epoch 1102/3000:  37%|███▋      | 1101/3000 [11:22<15:31,  2.04it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.76e+6]Epoch 1102/3000:  37%|███▋      | 1102/3000 [11:22<15:38,  2.02it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.76e+6]Epoch 1102/3000:  37%|███▋      | 1102/3000 [11:22<15:38,  2.02it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.76e+6]Epoch 1103/3000:  37%|███▋      | 1102/3000 [11:22<15:38,  2.02it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.76e+6]Epoch 1103/3000:  37%|███▋      | 1103/3000 [11:23<15:28,  2.04it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.76e+6]Epoch 1103/3000:  37%|███▋      | 1103/3000 [11:23<15:28,  2.04it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.76e+6]Epoch 1104/3000:  37%|███▋      | 1103/3000 [11:23<15:28,  2.04it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.76e+6]Epoch 1104/3000:  37%|███▋      | 1104/3000 [11:23<15:53,  1.99it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.76e+6]Epoch 1104/3000:  37%|███▋      | 1104/3000 [11:23<15:53,  1.99it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.76e+6]Epoch 1105/3000:  37%|███▋      | 1104/3000 [11:23<15:53,  1.99it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.76e+6]Epoch 1105/3000:  37%|███▋      | 1105/3000 [11:24<15:26,  2.05it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.76e+6]Epoch 1105/3000:  37%|███▋      | 1105/3000 [11:24<15:26,  2.05it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.76e+6]Epoch 1106/3000:  37%|███▋      | 1105/3000 [11:24<15:26,  2.05it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.76e+6]Epoch 1106/3000:  37%|███▋      | 1106/3000 [11:24<14:42,  2.15it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.76e+6]Epoch 1106/3000:  37%|███▋      | 1106/3000 [11:24<14:42,  2.15it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.76e+6]Epoch 1107/3000:  37%|███▋      | 1106/3000 [11:24<14:42,  2.15it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.76e+6]Epoch 1107/3000:  37%|███▋      | 1107/3000 [11:25<14:21,  2.20it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.76e+6]Epoch 1107/3000:  37%|███▋      | 1107/3000 [11:25<14:21,  2.20it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.76e+6]Epoch 1108/3000:  37%|███▋      | 1107/3000 [11:25<14:21,  2.20it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.76e+6]Epoch 1108/3000:  37%|███▋      | 1108/3000 [11:25<14:10,  2.22it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.76e+6]Epoch 1108/3000:  37%|███▋      | 1108/3000 [11:25<14:10,  2.22it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.76e+6]Epoch 1109/3000:  37%|███▋      | 1108/3000 [11:25<14:10,  2.22it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.76e+6]Epoch 1109/3000:  37%|███▋      | 1109/3000 [11:26<14:57,  2.11it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.76e+6]Epoch 1109/3000:  37%|███▋      | 1109/3000 [11:26<14:57,  2.11it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.76e+6] Epoch 1110/3000:  37%|███▋      | 1109/3000 [11:26<14:57,  2.11it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.76e+6]Epoch 1110/3000:  37%|███▋      | 1110/3000 [11:26<14:59,  2.10it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.76e+6]Epoch 1110/3000:  37%|███▋      | 1110/3000 [11:26<14:59,  2.10it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.76e+6]Epoch 1111/3000:  37%|███▋      | 1110/3000 [11:26<14:59,  2.10it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.76e+6]Epoch 1111/3000:  37%|███▋      | 1111/3000 [11:26<14:11,  2.22it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.76e+6]Epoch 1111/3000:  37%|███▋      | 1111/3000 [11:26<14:11,  2.22it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.76e+6]Epoch 1112/3000:  37%|███▋      | 1111/3000 [11:26<14:11,  2.22it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.76e+6]Epoch 1112/3000:  37%|███▋      | 1112/3000 [11:27<14:07,  2.23it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.76e+6]Epoch 1112/3000:  37%|███▋      | 1112/3000 [11:27<14:07,  2.23it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.76e+6]Epoch 1113/3000:  37%|███▋      | 1112/3000 [11:27<14:07,  2.23it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.76e+6]Epoch 1113/3000:  37%|███▋      | 1113/3000 [11:27<13:58,  2.25it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.76e+6]Epoch 1113/3000:  37%|███▋      | 1113/3000 [11:27<13:58,  2.25it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.76e+6]Epoch 1114/3000:  37%|███▋      | 1113/3000 [11:27<13:58,  2.25it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.76e+6]Epoch 1114/3000:  37%|███▋      | 1114/3000 [11:28<14:34,  2.16it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.76e+6]Epoch 1114/3000:  37%|███▋      | 1114/3000 [11:28<14:34,  2.16it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.75e+6]Epoch 1115/3000:  37%|███▋      | 1114/3000 [11:28<14:34,  2.16it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.75e+6]Epoch 1115/3000:  37%|███▋      | 1115/3000 [11:28<14:26,  2.17it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.75e+6]Epoch 1115/3000:  37%|███▋      | 1115/3000 [11:28<14:26,  2.17it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.75e+6] Epoch 1116/3000:  37%|███▋      | 1115/3000 [11:28<14:26,  2.17it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.75e+6]Epoch 1116/3000:  37%|███▋      | 1116/3000 [11:29<13:44,  2.29it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.75e+6]Epoch 1116/3000:  37%|███▋      | 1116/3000 [11:29<13:44,  2.29it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.75e+6]Epoch 1117/3000:  37%|███▋      | 1116/3000 [11:29<13:44,  2.29it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.75e+6]Epoch 1117/3000:  37%|███▋      | 1117/3000 [11:29<13:03,  2.40it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.75e+6]Epoch 1117/3000:  37%|███▋      | 1117/3000 [11:29<13:03,  2.40it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.75e+6]Epoch 1118/3000:  37%|███▋      | 1117/3000 [11:29<13:03,  2.40it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.75e+6]Epoch 1118/3000:  37%|███▋      | 1118/3000 [11:29<13:29,  2.32it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.75e+6]Epoch 1118/3000:  37%|███▋      | 1118/3000 [11:29<13:29,  2.32it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.75e+6]Epoch 1119/3000:  37%|███▋      | 1118/3000 [11:29<13:29,  2.32it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.75e+6]Epoch 1119/3000:  37%|███▋      | 1119/3000 [11:30<13:35,  2.31it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.75e+6]Epoch 1119/3000:  37%|███▋      | 1119/3000 [11:30<13:35,  2.31it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.75e+6]Epoch 1120/3000:  37%|███▋      | 1119/3000 [11:30<13:35,  2.31it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.75e+6]Epoch 1120/3000:  37%|███▋      | 1120/3000 [11:30<13:41,  2.29it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.75e+6]Epoch 1120/3000:  37%|███▋      | 1120/3000 [11:30<13:41,  2.29it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.75e+6]Epoch 1121/3000:  37%|███▋      | 1120/3000 [11:30<13:41,  2.29it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.75e+6]Epoch 1121/3000:  37%|███▋      | 1121/3000 [11:31<13:23,  2.34it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.75e+6]Epoch 1121/3000:  37%|███▋      | 1121/3000 [11:31<13:23,  2.34it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.75e+6]Epoch 1122/3000:  37%|███▋      | 1121/3000 [11:31<13:23,  2.34it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.75e+6]Epoch 1122/3000:  37%|███▋      | 1122/3000 [11:31<13:55,  2.25it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.75e+6]Epoch 1122/3000:  37%|███▋      | 1122/3000 [11:31<13:55,  2.25it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.75e+6]Epoch 1123/3000:  37%|███▋      | 1122/3000 [11:31<13:55,  2.25it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.75e+6]Epoch 1123/3000:  37%|███▋      | 1123/3000 [11:32<13:12,  2.37it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.75e+6]Epoch 1123/3000:  37%|███▋      | 1123/3000 [11:32<13:12,  2.37it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.75e+6]Epoch 1124/3000:  37%|███▋      | 1123/3000 [11:32<13:12,  2.37it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.75e+6]Epoch 1124/3000:  37%|███▋      | 1124/3000 [11:32<13:43,  2.28it/s, v_num=1, train_loss_step=1.82e+6, train_loss_epoch=1.75e+6]Epoch 1124/3000:  37%|███▋      | 1124/3000 [11:32<13:43,  2.28it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.75e+6]Epoch 1125/3000:  37%|███▋      | 1124/3000 [11:32<13:43,  2.28it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.75e+6]Epoch 1125/3000:  38%|███▊      | 1125/3000 [11:33<13:42,  2.28it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.75e+6]Epoch 1125/3000:  38%|███▊      | 1125/3000 [11:33<13:42,  2.28it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.75e+6]Epoch 1126/3000:  38%|███▊      | 1125/3000 [11:33<13:42,  2.28it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.75e+6]Epoch 1126/3000:  38%|███▊      | 1126/3000 [11:33<14:06,  2.21it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.75e+6]Epoch 1126/3000:  38%|███▊      | 1126/3000 [11:33<14:06,  2.21it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.75e+6]Epoch 1127/3000:  38%|███▊      | 1126/3000 [11:33<14:06,  2.21it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.75e+6]Epoch 1127/3000:  38%|███▊      | 1127/3000 [11:33<13:25,  2.32it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.75e+6]Epoch 1127/3000:  38%|███▊      | 1127/3000 [11:33<13:25,  2.32it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.75e+6]Epoch 1128/3000:  38%|███▊      | 1127/3000 [11:33<13:25,  2.32it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.75e+6]Epoch 1128/3000:  38%|███▊      | 1128/3000 [11:34<13:35,  2.30it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.75e+6]Epoch 1128/3000:  38%|███▊      | 1128/3000 [11:34<13:35,  2.30it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.75e+6]Epoch 1129/3000:  38%|███▊      | 1128/3000 [11:34<13:35,  2.30it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.75e+6]Epoch 1129/3000:  38%|███▊      | 1129/3000 [11:34<13:28,  2.31it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.75e+6]Epoch 1129/3000:  38%|███▊      | 1129/3000 [11:34<13:28,  2.31it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.75e+6]Epoch 1130/3000:  38%|███▊      | 1129/3000 [11:34<13:28,  2.31it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.75e+6]Epoch 1130/3000:  38%|███▊      | 1130/3000 [11:35<13:17,  2.34it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.75e+6]Epoch 1130/3000:  38%|███▊      | 1130/3000 [11:35<13:17,  2.34it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.74e+6]Epoch 1131/3000:  38%|███▊      | 1130/3000 [11:35<13:17,  2.34it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.74e+6]Epoch 1131/3000:  38%|███▊      | 1131/3000 [11:35<13:19,  2.34it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.74e+6]Epoch 1131/3000:  38%|███▊      | 1131/3000 [11:35<13:19,  2.34it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.74e+6]Epoch 1132/3000:  38%|███▊      | 1131/3000 [11:35<13:19,  2.34it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.74e+6]Epoch 1132/3000:  38%|███▊      | 1132/3000 [11:36<13:17,  2.34it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.74e+6]Epoch 1132/3000:  38%|███▊      | 1132/3000 [11:36<13:17,  2.34it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.74e+6]Epoch 1133/3000:  38%|███▊      | 1132/3000 [11:36<13:17,  2.34it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.74e+6]Epoch 1133/3000:  38%|███▊      | 1133/3000 [11:36<13:03,  2.38it/s, v_num=1, train_loss_step=1.81e+6, train_loss_epoch=1.74e+6]Epoch 1133/3000:  38%|███▊      | 1133/3000 [11:36<13:03,  2.38it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.74e+6]Epoch 1134/3000:  38%|███▊      | 1133/3000 [11:36<13:03,  2.38it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.74e+6]Epoch 1134/3000:  38%|███▊      | 1134/3000 [11:36<12:53,  2.41it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.74e+6]Epoch 1134/3000:  38%|███▊      | 1134/3000 [11:36<12:53,  2.41it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.74e+6]Epoch 1135/3000:  38%|███▊      | 1134/3000 [11:36<12:53,  2.41it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.74e+6]Epoch 1135/3000:  38%|███▊      | 1135/3000 [11:37<13:20,  2.33it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.74e+6]Epoch 1135/3000:  38%|███▊      | 1135/3000 [11:37<13:20,  2.33it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.74e+6]Epoch 1136/3000:  38%|███▊      | 1135/3000 [11:37<13:20,  2.33it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.74e+6]Epoch 1136/3000:  38%|███▊      | 1136/3000 [11:37<13:30,  2.30it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.74e+6]Epoch 1136/3000:  38%|███▊      | 1136/3000 [11:37<13:30,  2.30it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.74e+6]Epoch 1137/3000:  38%|███▊      | 1136/3000 [11:37<13:30,  2.30it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.74e+6]Epoch 1137/3000:  38%|███▊      | 1137/3000 [11:38<14:11,  2.19it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.74e+6]Epoch 1137/3000:  38%|███▊      | 1137/3000 [11:38<14:11,  2.19it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.74e+6]Epoch 1138/3000:  38%|███▊      | 1137/3000 [11:38<14:11,  2.19it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.74e+6]Epoch 1138/3000:  38%|███▊      | 1138/3000 [11:38<14:36,  2.12it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.74e+6]Epoch 1138/3000:  38%|███▊      | 1138/3000 [11:38<14:36,  2.12it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.74e+6]Epoch 1139/3000:  38%|███▊      | 1138/3000 [11:38<14:36,  2.12it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.74e+6]Epoch 1139/3000:  38%|███▊      | 1139/3000 [11:39<14:45,  2.10it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.74e+6]Epoch 1139/3000:  38%|███▊      | 1139/3000 [11:39<14:45,  2.10it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.74e+6]Epoch 1140/3000:  38%|███▊      | 1139/3000 [11:39<14:45,  2.10it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.74e+6]Epoch 1140/3000:  38%|███▊      | 1140/3000 [11:39<14:18,  2.17it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.74e+6]Epoch 1140/3000:  38%|███▊      | 1140/3000 [11:39<14:18,  2.17it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.74e+6]Epoch 1141/3000:  38%|███▊      | 1140/3000 [11:39<14:18,  2.17it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.74e+6]Epoch 1141/3000:  38%|███▊      | 1141/3000 [11:40<14:25,  2.15it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.74e+6]Epoch 1141/3000:  38%|███▊      | 1141/3000 [11:40<14:25,  2.15it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.74e+6]Epoch 1142/3000:  38%|███▊      | 1141/3000 [11:40<14:25,  2.15it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.74e+6]Epoch 1142/3000:  38%|███▊      | 1142/3000 [11:40<12:58,  2.39it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.74e+6]Epoch 1142/3000:  38%|███▊      | 1142/3000 [11:40<12:58,  2.39it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.74e+6]Epoch 1143/3000:  38%|███▊      | 1142/3000 [11:40<12:58,  2.39it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.74e+6]Epoch 1143/3000:  38%|███▊      | 1143/3000 [11:40<13:46,  2.25it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.74e+6]Epoch 1143/3000:  38%|███▊      | 1143/3000 [11:40<13:46,  2.25it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.74e+6]Epoch 1144/3000:  38%|███▊      | 1143/3000 [11:41<13:46,  2.25it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.74e+6]Epoch 1144/3000:  38%|███▊      | 1144/3000 [11:41<13:05,  2.36it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.74e+6]Epoch 1144/3000:  38%|███▊      | 1144/3000 [11:41<13:05,  2.36it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.74e+6]Epoch 1145/3000:  38%|███▊      | 1144/3000 [11:41<13:05,  2.36it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.74e+6]Epoch 1145/3000:  38%|███▊      | 1145/3000 [11:41<13:41,  2.26it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.74e+6]Epoch 1145/3000:  38%|███▊      | 1145/3000 [11:41<13:41,  2.26it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.74e+6]Epoch 1146/3000:  38%|███▊      | 1145/3000 [11:41<13:41,  2.26it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.74e+6]Epoch 1146/3000:  38%|███▊      | 1146/3000 [11:42<14:01,  2.20it/s, v_num=1, train_loss_step=1.79e+6, train_loss_epoch=1.74e+6]Epoch 1146/3000:  38%|███▊      | 1146/3000 [11:42<14:01,  2.20it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.73e+6]Epoch 1147/3000:  38%|███▊      | 1146/3000 [11:42<14:01,  2.20it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.73e+6]Epoch 1147/3000:  38%|███▊      | 1147/3000 [11:42<13:02,  2.37it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.73e+6]Epoch 1147/3000:  38%|███▊      | 1147/3000 [11:42<13:02,  2.37it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.73e+6]Epoch 1148/3000:  38%|███▊      | 1147/3000 [11:42<13:02,  2.37it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.73e+6]Epoch 1148/3000:  38%|███▊      | 1148/3000 [11:43<12:35,  2.45it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.73e+6]Epoch 1148/3000:  38%|███▊      | 1148/3000 [11:43<12:35,  2.45it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.73e+6]Epoch 1149/3000:  38%|███▊      | 1148/3000 [11:43<12:35,  2.45it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.73e+6]Epoch 1149/3000:  38%|███▊      | 1149/3000 [11:43<12:48,  2.41it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.73e+6]Epoch 1149/3000:  38%|███▊      | 1149/3000 [11:43<12:48,  2.41it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.73e+6]Epoch 1150/3000:  38%|███▊      | 1149/3000 [11:43<12:48,  2.41it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.73e+6]Epoch 1150/3000:  38%|███▊      | 1150/3000 [11:43<13:22,  2.31it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.73e+6]Epoch 1150/3000:  38%|███▊      | 1150/3000 [11:43<13:22,  2.31it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.73e+6]Epoch 1151/3000:  38%|███▊      | 1150/3000 [11:43<13:22,  2.31it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.73e+6]Epoch 1151/3000:  38%|███▊      | 1151/3000 [11:44<13:32,  2.27it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.73e+6]Epoch 1151/3000:  38%|███▊      | 1151/3000 [11:44<13:32,  2.27it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.73e+6]Epoch 1152/3000:  38%|███▊      | 1151/3000 [11:44<13:32,  2.27it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.73e+6]Epoch 1152/3000:  38%|███▊      | 1152/3000 [11:44<13:20,  2.31it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.73e+6]Epoch 1152/3000:  38%|███▊      | 1152/3000 [11:44<13:20,  2.31it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.73e+6]Epoch 1153/3000:  38%|███▊      | 1152/3000 [11:44<13:20,  2.31it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.73e+6]Epoch 1153/3000:  38%|███▊      | 1153/3000 [11:45<12:56,  2.38it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.73e+6]Epoch 1153/3000:  38%|███▊      | 1153/3000 [11:45<12:56,  2.38it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.73e+6]Epoch 1154/3000:  38%|███▊      | 1153/3000 [11:45<12:56,  2.38it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.73e+6]Epoch 1154/3000:  38%|███▊      | 1154/3000 [11:45<12:35,  2.44it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.73e+6]Epoch 1154/3000:  38%|███▊      | 1154/3000 [11:45<12:35,  2.44it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.73e+6]Epoch 1155/3000:  38%|███▊      | 1154/3000 [11:45<12:35,  2.44it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.73e+6]Epoch 1155/3000:  38%|███▊      | 1155/3000 [11:46<12:38,  2.43it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.73e+6]Epoch 1155/3000:  38%|███▊      | 1155/3000 [11:46<12:38,  2.43it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.73e+6]Epoch 1156/3000:  38%|███▊      | 1155/3000 [11:46<12:38,  2.43it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.73e+6]Epoch 1156/3000:  39%|███▊      | 1156/3000 [11:46<13:14,  2.32it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.73e+6]Epoch 1156/3000:  39%|███▊      | 1156/3000 [11:46<13:14,  2.32it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.73e+6]Epoch 1157/3000:  39%|███▊      | 1156/3000 [11:46<13:14,  2.32it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.73e+6]Epoch 1157/3000:  39%|███▊      | 1157/3000 [11:46<12:44,  2.41it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.73e+6]Epoch 1157/3000:  39%|███▊      | 1157/3000 [11:46<12:44,  2.41it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.73e+6]Epoch 1158/3000:  39%|███▊      | 1157/3000 [11:46<12:44,  2.41it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.73e+6]Epoch 1158/3000:  39%|███▊      | 1158/3000 [11:47<12:27,  2.46it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.73e+6]Epoch 1158/3000:  39%|███▊      | 1158/3000 [11:47<12:27,  2.46it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.73e+6]Epoch 1159/3000:  39%|███▊      | 1158/3000 [11:47<12:27,  2.46it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.73e+6]Epoch 1159/3000:  39%|███▊      | 1159/3000 [11:47<12:27,  2.46it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.73e+6]Epoch 1159/3000:  39%|███▊      | 1159/3000 [11:47<12:27,  2.46it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.73e+6]Epoch 1160/3000:  39%|███▊      | 1159/3000 [11:47<12:27,  2.46it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.73e+6]Epoch 1160/3000:  39%|███▊      | 1160/3000 [11:48<12:40,  2.42it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.73e+6]Epoch 1160/3000:  39%|███▊      | 1160/3000 [11:48<12:40,  2.42it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.73e+6]Epoch 1161/3000:  39%|███▊      | 1160/3000 [11:48<12:40,  2.42it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.73e+6]Epoch 1161/3000:  39%|███▊      | 1161/3000 [11:48<13:38,  2.25it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.73e+6]Epoch 1161/3000:  39%|███▊      | 1161/3000 [11:48<13:38,  2.25it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.73e+6] Epoch 1162/3000:  39%|███▊      | 1161/3000 [11:48<13:38,  2.25it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.73e+6]Epoch 1162/3000:  39%|███▊      | 1162/3000 [11:49<13:51,  2.21it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.73e+6]Epoch 1162/3000:  39%|███▊      | 1162/3000 [11:49<13:51,  2.21it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.73e+6]Epoch 1163/3000:  39%|███▊      | 1162/3000 [11:49<13:51,  2.21it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.73e+6]Epoch 1163/3000:  39%|███▉      | 1163/3000 [11:49<13:25,  2.28it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.73e+6]Epoch 1163/3000:  39%|███▉      | 1163/3000 [11:49<13:25,  2.28it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.72e+6]Epoch 1164/3000:  39%|███▉      | 1163/3000 [11:49<13:25,  2.28it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.72e+6]Epoch 1164/3000:  39%|███▉      | 1164/3000 [11:49<12:54,  2.37it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.72e+6]Epoch 1164/3000:  39%|███▉      | 1164/3000 [11:49<12:54,  2.37it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.72e+6] Epoch 1165/3000:  39%|███▉      | 1164/3000 [11:49<12:54,  2.37it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.72e+6]Epoch 1165/3000:  39%|███▉      | 1165/3000 [11:50<12:54,  2.37it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.72e+6]Epoch 1165/3000:  39%|███▉      | 1165/3000 [11:50<12:54,  2.37it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.72e+6]Epoch 1166/3000:  39%|███▉      | 1165/3000 [11:50<12:54,  2.37it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.72e+6]Epoch 1166/3000:  39%|███▉      | 1166/3000 [11:50<13:15,  2.31it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.72e+6]Epoch 1166/3000:  39%|███▉      | 1166/3000 [11:50<13:15,  2.31it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.72e+6]Epoch 1167/3000:  39%|███▉      | 1166/3000 [11:50<13:15,  2.31it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.72e+6]Epoch 1167/3000:  39%|███▉      | 1167/3000 [11:51<13:43,  2.23it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.72e+6]Epoch 1167/3000:  39%|███▉      | 1167/3000 [11:51<13:43,  2.23it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.72e+6]Epoch 1168/3000:  39%|███▉      | 1167/3000 [11:51<13:43,  2.23it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.72e+6]Epoch 1168/3000:  39%|███▉      | 1168/3000 [11:51<14:18,  2.13it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.72e+6]Epoch 1168/3000:  39%|███▉      | 1168/3000 [11:51<14:18,  2.13it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.72e+6]Epoch 1169/3000:  39%|███▉      | 1168/3000 [11:51<14:18,  2.13it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.72e+6]Epoch 1169/3000:  39%|███▉      | 1169/3000 [11:52<14:20,  2.13it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.72e+6]Epoch 1169/3000:  39%|███▉      | 1169/3000 [11:52<14:20,  2.13it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.72e+6]Epoch 1170/3000:  39%|███▉      | 1169/3000 [11:52<14:20,  2.13it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.72e+6]Epoch 1170/3000:  39%|███▉      | 1170/3000 [11:52<13:37,  2.24it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.72e+6]Epoch 1170/3000:  39%|███▉      | 1170/3000 [11:52<13:37,  2.24it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.72e+6]Epoch 1171/3000:  39%|███▉      | 1170/3000 [11:52<13:37,  2.24it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.72e+6]Epoch 1171/3000:  39%|███▉      | 1171/3000 [11:53<13:12,  2.31it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.72e+6]Epoch 1171/3000:  39%|███▉      | 1171/3000 [11:53<13:12,  2.31it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.72e+6]Epoch 1172/3000:  39%|███▉      | 1171/3000 [11:53<13:12,  2.31it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.72e+6]Epoch 1172/3000:  39%|███▉      | 1172/3000 [11:53<13:25,  2.27it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.72e+6]Epoch 1172/3000:  39%|███▉      | 1172/3000 [11:53<13:25,  2.27it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.72e+6]Epoch 1173/3000:  39%|███▉      | 1172/3000 [11:53<13:25,  2.27it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.72e+6]Epoch 1173/3000:  39%|███▉      | 1173/3000 [11:53<13:19,  2.29it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.72e+6]Epoch 1173/3000:  39%|███▉      | 1173/3000 [11:53<13:19,  2.29it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.72e+6]Epoch 1174/3000:  39%|███▉      | 1173/3000 [11:53<13:19,  2.29it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.72e+6]Epoch 1174/3000:  39%|███▉      | 1174/3000 [11:54<13:50,  2.20it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.72e+6]Epoch 1174/3000:  39%|███▉      | 1174/3000 [11:54<13:50,  2.20it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.72e+6]Epoch 1175/3000:  39%|███▉      | 1174/3000 [11:54<13:50,  2.20it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.72e+6]Epoch 1175/3000:  39%|███▉      | 1175/3000 [11:54<14:23,  2.11it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.72e+6]Epoch 1175/3000:  39%|███▉      | 1175/3000 [11:54<14:23,  2.11it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.72e+6] Epoch 1176/3000:  39%|███▉      | 1175/3000 [11:54<14:23,  2.11it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.72e+6]Epoch 1176/3000:  39%|███▉      | 1176/3000 [11:55<13:27,  2.26it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.72e+6]Epoch 1176/3000:  39%|███▉      | 1176/3000 [11:55<13:27,  2.26it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.72e+6]Epoch 1177/3000:  39%|███▉      | 1176/3000 [11:55<13:27,  2.26it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.72e+6]Epoch 1177/3000:  39%|███▉      | 1177/3000 [11:55<13:04,  2.32it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.72e+6]Epoch 1177/3000:  39%|███▉      | 1177/3000 [11:55<13:04,  2.32it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.72e+6]Epoch 1178/3000:  39%|███▉      | 1177/3000 [11:55<13:04,  2.32it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.72e+6]Epoch 1178/3000:  39%|███▉      | 1178/3000 [11:56<13:12,  2.30it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.72e+6]Epoch 1178/3000:  39%|███▉      | 1178/3000 [11:56<13:12,  2.30it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.72e+6]Epoch 1179/3000:  39%|███▉      | 1178/3000 [11:56<13:12,  2.30it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.72e+6]Epoch 1179/3000:  39%|███▉      | 1179/3000 [11:56<13:39,  2.22it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.72e+6]Epoch 1179/3000:  39%|███▉      | 1179/3000 [11:56<13:39,  2.22it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.72e+6]Epoch 1180/3000:  39%|███▉      | 1179/3000 [11:56<13:39,  2.22it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.72e+6]Epoch 1180/3000:  39%|███▉      | 1180/3000 [11:57<13:59,  2.17it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.72e+6]Epoch 1180/3000:  39%|███▉      | 1180/3000 [11:57<13:59,  2.17it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.72e+6] Epoch 1181/3000:  39%|███▉      | 1180/3000 [11:57<13:59,  2.17it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.72e+6]Epoch 1181/3000:  39%|███▉      | 1181/3000 [11:57<13:40,  2.22it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.72e+6]Epoch 1181/3000:  39%|███▉      | 1181/3000 [11:57<13:40,  2.22it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.71e+6]Epoch 1182/3000:  39%|███▉      | 1181/3000 [11:57<13:40,  2.22it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.71e+6]Epoch 1182/3000:  39%|███▉      | 1182/3000 [11:57<13:20,  2.27it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.71e+6]Epoch 1182/3000:  39%|███▉      | 1182/3000 [11:57<13:20,  2.27it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.71e+6]Epoch 1183/3000:  39%|███▉      | 1182/3000 [11:57<13:20,  2.27it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.71e+6]Epoch 1183/3000:  39%|███▉      | 1183/3000 [11:58<12:52,  2.35it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.71e+6]Epoch 1183/3000:  39%|███▉      | 1183/3000 [11:58<12:52,  2.35it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.71e+6]Epoch 1184/3000:  39%|███▉      | 1183/3000 [11:58<12:52,  2.35it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.71e+6]Epoch 1184/3000:  39%|███▉      | 1184/3000 [11:58<11:36,  2.61it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.71e+6]Epoch 1184/3000:  39%|███▉      | 1184/3000 [11:58<11:36,  2.61it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.71e+6]Epoch 1185/3000:  39%|███▉      | 1184/3000 [11:58<11:36,  2.61it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.71e+6]Epoch 1185/3000:  40%|███▉      | 1185/3000 [11:59<11:40,  2.59it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.71e+6]Epoch 1185/3000:  40%|███▉      | 1185/3000 [11:59<11:40,  2.59it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.71e+6]Epoch 1186/3000:  40%|███▉      | 1185/3000 [11:59<11:40,  2.59it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.71e+6]Epoch 1186/3000:  40%|███▉      | 1186/3000 [11:59<11:55,  2.54it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.71e+6]Epoch 1186/3000:  40%|███▉      | 1186/3000 [11:59<11:55,  2.54it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.71e+6]Epoch 1187/3000:  40%|███▉      | 1186/3000 [11:59<11:55,  2.54it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.71e+6]Epoch 1187/3000:  40%|███▉      | 1187/3000 [11:59<12:32,  2.41it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.71e+6]Epoch 1187/3000:  40%|███▉      | 1187/3000 [11:59<12:32,  2.41it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.71e+6]Epoch 1188/3000:  40%|███▉      | 1187/3000 [11:59<12:32,  2.41it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.71e+6]Epoch 1188/3000:  40%|███▉      | 1188/3000 [12:00<12:34,  2.40it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.71e+6]Epoch 1188/3000:  40%|███▉      | 1188/3000 [12:00<12:34,  2.40it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.71e+6]Epoch 1189/3000:  40%|███▉      | 1188/3000 [12:00<12:34,  2.40it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.71e+6]Epoch 1189/3000:  40%|███▉      | 1189/3000 [12:00<12:53,  2.34it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.71e+6]Epoch 1189/3000:  40%|███▉      | 1189/3000 [12:00<12:53,  2.34it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.71e+6]Epoch 1190/3000:  40%|███▉      | 1189/3000 [12:00<12:53,  2.34it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.71e+6]Epoch 1190/3000:  40%|███▉      | 1190/3000 [12:01<12:48,  2.36it/s, v_num=1, train_loss_step=1.77e+6, train_loss_epoch=1.71e+6]Epoch 1190/3000:  40%|███▉      | 1190/3000 [12:01<12:48,  2.36it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.71e+6]Epoch 1191/3000:  40%|███▉      | 1190/3000 [12:01<12:48,  2.36it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.71e+6]Epoch 1191/3000:  40%|███▉      | 1191/3000 [12:01<12:53,  2.34it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.71e+6]Epoch 1191/3000:  40%|███▉      | 1191/3000 [12:01<12:53,  2.34it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.71e+6]Epoch 1192/3000:  40%|███▉      | 1191/3000 [12:01<12:53,  2.34it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.71e+6]Epoch 1192/3000:  40%|███▉      | 1192/3000 [12:02<12:51,  2.34it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.71e+6]Epoch 1192/3000:  40%|███▉      | 1192/3000 [12:02<12:51,  2.34it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.71e+6]Epoch 1193/3000:  40%|███▉      | 1192/3000 [12:02<12:51,  2.34it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.71e+6]Epoch 1193/3000:  40%|███▉      | 1193/3000 [12:02<12:57,  2.32it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.71e+6]Epoch 1193/3000:  40%|███▉      | 1193/3000 [12:02<12:57,  2.32it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.71e+6]Epoch 1194/3000:  40%|███▉      | 1193/3000 [12:02<12:57,  2.32it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.71e+6]Epoch 1194/3000:  40%|███▉      | 1194/3000 [12:02<12:48,  2.35it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.71e+6]Epoch 1194/3000:  40%|███▉      | 1194/3000 [12:02<12:48,  2.35it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.71e+6]Epoch 1195/3000:  40%|███▉      | 1194/3000 [12:02<12:48,  2.35it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.71e+6]Epoch 1195/3000:  40%|███▉      | 1195/3000 [12:03<13:02,  2.31it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.71e+6]Epoch 1195/3000:  40%|███▉      | 1195/3000 [12:03<13:02,  2.31it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.71e+6]Epoch 1196/3000:  40%|███▉      | 1195/3000 [12:03<13:02,  2.31it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.71e+6]Epoch 1196/3000:  40%|███▉      | 1196/3000 [12:03<12:52,  2.34it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.71e+6]Epoch 1196/3000:  40%|███▉      | 1196/3000 [12:03<12:52,  2.34it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.71e+6]Epoch 1197/3000:  40%|███▉      | 1196/3000 [12:03<12:52,  2.34it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.71e+6]Epoch 1197/3000:  40%|███▉      | 1197/3000 [12:04<12:55,  2.33it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.71e+6]Epoch 1197/3000:  40%|███▉      | 1197/3000 [12:04<12:55,  2.33it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.71e+6]Epoch 1198/3000:  40%|███▉      | 1197/3000 [12:04<12:55,  2.33it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.71e+6]Epoch 1198/3000:  40%|███▉      | 1198/3000 [12:04<12:47,  2.35it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.71e+6]Epoch 1198/3000:  40%|███▉      | 1198/3000 [12:04<12:47,  2.35it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.71e+6]Epoch 1199/3000:  40%|███▉      | 1198/3000 [12:04<12:47,  2.35it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.71e+6]Epoch 1199/3000:  40%|███▉      | 1199/3000 [12:05<12:47,  2.35it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.71e+6]Epoch 1199/3000:  40%|███▉      | 1199/3000 [12:05<12:47,  2.35it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.7e+6] Epoch 1200/3000:  40%|███▉      | 1199/3000 [12:05<12:47,  2.35it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.7e+6]Epoch 1200/3000:  40%|████      | 1200/3000 [12:05<13:09,  2.28it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.7e+6]Epoch 1200/3000:  40%|████      | 1200/3000 [12:05<13:09,  2.28it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.7e+6]Epoch 1201/3000:  40%|████      | 1200/3000 [12:05<13:09,  2.28it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.7e+6]Epoch 1201/3000:  40%|████      | 1201/3000 [12:05<12:56,  2.32it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.7e+6]Epoch 1201/3000:  40%|████      | 1201/3000 [12:05<12:56,  2.32it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.7e+6]Epoch 1202/3000:  40%|████      | 1201/3000 [12:05<12:56,  2.32it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.7e+6]Epoch 1202/3000:  40%|████      | 1202/3000 [12:06<12:13,  2.45it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.7e+6]Epoch 1202/3000:  40%|████      | 1202/3000 [12:06<12:13,  2.45it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.7e+6]Epoch 1203/3000:  40%|████      | 1202/3000 [12:06<12:13,  2.45it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.7e+6]Epoch 1203/3000:  40%|████      | 1203/3000 [12:06<12:42,  2.36it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.7e+6]Epoch 1203/3000:  40%|████      | 1203/3000 [12:06<12:42,  2.36it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.7e+6]Epoch 1204/3000:  40%|████      | 1203/3000 [12:06<12:42,  2.36it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.7e+6]Epoch 1204/3000:  40%|████      | 1204/3000 [12:07<13:09,  2.27it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.7e+6]Epoch 1204/3000:  40%|████      | 1204/3000 [12:07<13:09,  2.27it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.7e+6]Epoch 1205/3000:  40%|████      | 1204/3000 [12:07<13:09,  2.27it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.7e+6]Epoch 1205/3000:  40%|████      | 1205/3000 [12:07<12:41,  2.36it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.7e+6]Epoch 1205/3000:  40%|████      | 1205/3000 [12:07<12:41,  2.36it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.7e+6]Epoch 1206/3000:  40%|████      | 1205/3000 [12:07<12:41,  2.36it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.7e+6]Epoch 1206/3000:  40%|████      | 1206/3000 [12:08<13:20,  2.24it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.7e+6]Epoch 1206/3000:  40%|████      | 1206/3000 [12:08<13:20,  2.24it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.7e+6]Epoch 1207/3000:  40%|████      | 1206/3000 [12:08<13:20,  2.24it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.7e+6]Epoch 1207/3000:  40%|████      | 1207/3000 [12:08<13:40,  2.19it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.7e+6]Epoch 1207/3000:  40%|████      | 1207/3000 [12:08<13:40,  2.19it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.7e+6]Epoch 1208/3000:  40%|████      | 1207/3000 [12:08<13:40,  2.19it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.7e+6]Epoch 1208/3000:  40%|████      | 1208/3000 [12:09<13:49,  2.16it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.7e+6]Epoch 1208/3000:  40%|████      | 1208/3000 [12:09<13:49,  2.16it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.7e+6]Epoch 1209/3000:  40%|████      | 1208/3000 [12:09<13:49,  2.16it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.7e+6]Epoch 1209/3000:  40%|████      | 1209/3000 [12:09<13:25,  2.22it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.7e+6]Epoch 1209/3000:  40%|████      | 1209/3000 [12:09<13:25,  2.22it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.7e+6]Epoch 1210/3000:  40%|████      | 1209/3000 [12:09<13:25,  2.22it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.7e+6]Epoch 1210/3000:  40%|████      | 1210/3000 [12:09<13:35,  2.20it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.7e+6]Epoch 1210/3000:  40%|████      | 1210/3000 [12:09<13:35,  2.20it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.7e+6]Epoch 1211/3000:  40%|████      | 1210/3000 [12:09<13:35,  2.20it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.7e+6]Epoch 1211/3000:  40%|████      | 1211/3000 [12:10<13:51,  2.15it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.7e+6]Epoch 1211/3000:  40%|████      | 1211/3000 [12:10<13:51,  2.15it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.7e+6]Epoch 1212/3000:  40%|████      | 1211/3000 [12:10<13:51,  2.15it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.7e+6]Epoch 1212/3000:  40%|████      | 1212/3000 [12:10<13:38,  2.18it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.7e+6]Epoch 1212/3000:  40%|████      | 1212/3000 [12:10<13:38,  2.18it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.7e+6] Epoch 1213/3000:  40%|████      | 1212/3000 [12:10<13:38,  2.18it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.7e+6]Epoch 1213/3000:  40%|████      | 1213/3000 [12:11<13:30,  2.20it/s, v_num=1, train_loss_step=1.8e+6, train_loss_epoch=1.7e+6]Epoch 1213/3000:  40%|████      | 1213/3000 [12:11<13:30,  2.20it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.7e+6]Epoch 1214/3000:  40%|████      | 1213/3000 [12:11<13:30,  2.20it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.7e+6]Epoch 1214/3000:  40%|████      | 1214/3000 [12:11<12:59,  2.29it/s, v_num=1, train_loss_step=1.78e+6, train_loss_epoch=1.7e+6]Epoch 1214/3000:  40%|████      | 1214/3000 [12:11<12:59,  2.29it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.7e+6]Epoch 1215/3000:  40%|████      | 1214/3000 [12:11<12:59,  2.29it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.7e+6]Epoch 1215/3000:  40%|████      | 1215/3000 [12:12<13:21,  2.23it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.7e+6]Epoch 1215/3000:  40%|████      | 1215/3000 [12:12<13:21,  2.23it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.7e+6]Epoch 1216/3000:  40%|████      | 1215/3000 [12:12<13:21,  2.23it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.7e+6]Epoch 1216/3000:  41%|████      | 1216/3000 [12:12<14:02,  2.12it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.7e+6]Epoch 1216/3000:  41%|████      | 1216/3000 [12:12<14:02,  2.12it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.7e+6]Epoch 1217/3000:  41%|████      | 1216/3000 [12:12<14:02,  2.12it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.7e+6]Epoch 1217/3000:  41%|████      | 1217/3000 [12:13<13:26,  2.21it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.7e+6]Epoch 1217/3000:  41%|████      | 1217/3000 [12:13<13:26,  2.21it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.7e+6]Epoch 1218/3000:  41%|████      | 1217/3000 [12:13<13:26,  2.21it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.7e+6]Epoch 1218/3000:  41%|████      | 1218/3000 [12:13<13:45,  2.16it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.7e+6]Epoch 1218/3000:  41%|████      | 1218/3000 [12:13<13:45,  2.16it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.69e+6]Epoch 1219/3000:  41%|████      | 1218/3000 [12:13<13:45,  2.16it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.69e+6]Epoch 1219/3000:  41%|████      | 1219/3000 [12:14<13:23,  2.22it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.69e+6]Epoch 1219/3000:  41%|████      | 1219/3000 [12:14<13:23,  2.22it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.69e+6]Epoch 1220/3000:  41%|████      | 1219/3000 [12:14<13:23,  2.22it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.69e+6]Epoch 1220/3000:  41%|████      | 1220/3000 [12:14<13:30,  2.20it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.69e+6]Epoch 1220/3000:  41%|████      | 1220/3000 [12:14<13:30,  2.20it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.69e+6]Epoch 1221/3000:  41%|████      | 1220/3000 [12:14<13:30,  2.20it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.69e+6]Epoch 1221/3000:  41%|████      | 1221/3000 [12:14<13:36,  2.18it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.69e+6]Epoch 1221/3000:  41%|████      | 1221/3000 [12:14<13:36,  2.18it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.69e+6]Epoch 1222/3000:  41%|████      | 1221/3000 [12:14<13:36,  2.18it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.69e+6]Epoch 1222/3000:  41%|████      | 1222/3000 [12:15<12:44,  2.32it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.69e+6]Epoch 1222/3000:  41%|████      | 1222/3000 [12:15<12:44,  2.32it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.69e+6]Epoch 1223/3000:  41%|████      | 1222/3000 [12:15<12:44,  2.32it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.69e+6]Epoch 1223/3000:  41%|████      | 1223/3000 [12:15<13:18,  2.22it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.69e+6]Epoch 1223/3000:  41%|████      | 1223/3000 [12:15<13:18,  2.22it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.69e+6]Epoch 1224/3000:  41%|████      | 1223/3000 [12:15<13:18,  2.22it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.69e+6]Epoch 1224/3000:  41%|████      | 1224/3000 [12:16<12:40,  2.33it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.69e+6]Epoch 1224/3000:  41%|████      | 1224/3000 [12:16<12:40,  2.33it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.69e+6]Epoch 1225/3000:  41%|████      | 1224/3000 [12:16<12:40,  2.33it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.69e+6]Epoch 1225/3000:  41%|████      | 1225/3000 [12:16<13:21,  2.21it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.69e+6]Epoch 1225/3000:  41%|████      | 1225/3000 [12:16<13:21,  2.21it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.69e+6]Epoch 1226/3000:  41%|████      | 1225/3000 [12:16<13:21,  2.21it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.69e+6]Epoch 1226/3000:  41%|████      | 1226/3000 [12:17<13:21,  2.21it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.69e+6]Epoch 1226/3000:  41%|████      | 1226/3000 [12:17<13:21,  2.21it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.69e+6]Epoch 1227/3000:  41%|████      | 1226/3000 [12:17<13:21,  2.21it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.69e+6]Epoch 1227/3000:  41%|████      | 1227/3000 [12:17<13:33,  2.18it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.69e+6]Epoch 1227/3000:  41%|████      | 1227/3000 [12:17<13:33,  2.18it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.69e+6]Epoch 1228/3000:  41%|████      | 1227/3000 [12:17<13:33,  2.18it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.69e+6]Epoch 1228/3000:  41%|████      | 1228/3000 [12:18<14:09,  2.09it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.69e+6]Epoch 1228/3000:  41%|████      | 1228/3000 [12:18<14:09,  2.09it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.69e+6] Epoch 1229/3000:  41%|████      | 1228/3000 [12:18<14:09,  2.09it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.69e+6]Epoch 1229/3000:  41%|████      | 1229/3000 [12:18<14:05,  2.09it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.69e+6]Epoch 1229/3000:  41%|████      | 1229/3000 [12:18<14:05,  2.09it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.69e+6]Epoch 1230/3000:  41%|████      | 1229/3000 [12:18<14:05,  2.09it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.69e+6]Epoch 1230/3000:  41%|████      | 1230/3000 [12:19<13:34,  2.17it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.69e+6]Epoch 1230/3000:  41%|████      | 1230/3000 [12:19<13:34,  2.17it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.69e+6]Epoch 1231/3000:  41%|████      | 1230/3000 [12:19<13:34,  2.17it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.69e+6]Epoch 1231/3000:  41%|████      | 1231/3000 [12:19<13:37,  2.16it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.69e+6]Epoch 1231/3000:  41%|████      | 1231/3000 [12:19<13:37,  2.16it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.69e+6]Epoch 1232/3000:  41%|████      | 1231/3000 [12:19<13:37,  2.16it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.69e+6]Epoch 1232/3000:  41%|████      | 1232/3000 [12:19<13:14,  2.23it/s, v_num=1, train_loss_step=1.74e+6, train_loss_epoch=1.69e+6]Epoch 1232/3000:  41%|████      | 1232/3000 [12:19<13:14,  2.23it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.69e+6]Epoch 1233/3000:  41%|████      | 1232/3000 [12:19<13:14,  2.23it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.69e+6]Epoch 1233/3000:  41%|████      | 1233/3000 [12:20<13:25,  2.20it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.69e+6]Epoch 1233/3000:  41%|████      | 1233/3000 [12:20<13:25,  2.20it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.69e+6]Epoch 1234/3000:  41%|████      | 1233/3000 [12:20<13:25,  2.20it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.69e+6]Epoch 1234/3000:  41%|████      | 1234/3000 [12:20<13:04,  2.25it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.69e+6]Epoch 1234/3000:  41%|████      | 1234/3000 [12:20<13:04,  2.25it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.69e+6]Epoch 1235/3000:  41%|████      | 1234/3000 [12:20<13:04,  2.25it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.69e+6]Epoch 1235/3000:  41%|████      | 1235/3000 [12:21<13:58,  2.10it/s, v_num=1, train_loss_step=1.75e+6, train_loss_epoch=1.69e+6]Epoch 1235/3000:  41%|████      | 1235/3000 [12:21<13:58,  2.10it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.69e+6]Epoch 1236/3000:  41%|████      | 1235/3000 [12:21<13:58,  2.10it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.69e+6]Epoch 1236/3000:  41%|████      | 1236/3000 [12:21<13:26,  2.19it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.69e+6]Epoch 1236/3000:  41%|████      | 1236/3000 [12:21<13:26,  2.19it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.69e+6]Epoch 1237/3000:  41%|████      | 1236/3000 [12:21<13:26,  2.19it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.69e+6]Epoch 1237/3000:  41%|████      | 1237/3000 [12:22<13:07,  2.24it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.69e+6]Epoch 1237/3000:  41%|████      | 1237/3000 [12:22<13:07,  2.24it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.69e+6]Epoch 1238/3000:  41%|████      | 1237/3000 [12:22<13:07,  2.24it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.69e+6]Epoch 1238/3000:  41%|████▏     | 1238/3000 [12:22<13:28,  2.18it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.69e+6]Epoch 1238/3000:  41%|████▏     | 1238/3000 [12:22<13:28,  2.18it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.68e+6]Epoch 1239/3000:  41%|████▏     | 1238/3000 [12:22<13:28,  2.18it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.68e+6]Epoch 1239/3000:  41%|████▏     | 1239/3000 [12:23<12:22,  2.37it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.68e+6]Epoch 1239/3000:  41%|████▏     | 1239/3000 [12:23<12:22,  2.37it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.68e+6]Epoch 1240/3000:  41%|████▏     | 1239/3000 [12:23<12:22,  2.37it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.68e+6]Epoch 1240/3000:  41%|████▏     | 1240/3000 [12:23<12:53,  2.28it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.68e+6]Epoch 1240/3000:  41%|████▏     | 1240/3000 [12:23<12:53,  2.28it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.68e+6]Epoch 1241/3000:  41%|████▏     | 1240/3000 [12:23<12:53,  2.28it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.68e+6]Epoch 1241/3000:  41%|████▏     | 1241/3000 [12:23<12:49,  2.28it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.68e+6]Epoch 1241/3000:  41%|████▏     | 1241/3000 [12:23<12:49,  2.28it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.68e+6]Epoch 1242/3000:  41%|████▏     | 1241/3000 [12:23<12:49,  2.28it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.68e+6]Epoch 1242/3000:  41%|████▏     | 1242/3000 [12:24<12:16,  2.39it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.68e+6]Epoch 1242/3000:  41%|████▏     | 1242/3000 [12:24<12:16,  2.39it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.68e+6]Epoch 1243/3000:  41%|████▏     | 1242/3000 [12:24<12:16,  2.39it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.68e+6]Epoch 1243/3000:  41%|████▏     | 1243/3000 [12:24<11:32,  2.54it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.68e+6]Epoch 1243/3000:  41%|████▏     | 1243/3000 [12:24<11:32,  2.54it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.68e+6] Epoch 1244/3000:  41%|████▏     | 1243/3000 [12:24<11:32,  2.54it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.68e+6]Epoch 1244/3000:  41%|████▏     | 1244/3000 [12:25<12:00,  2.44it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.68e+6]Epoch 1244/3000:  41%|████▏     | 1244/3000 [12:25<12:00,  2.44it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.68e+6]Epoch 1245/3000:  41%|████▏     | 1244/3000 [12:25<12:00,  2.44it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.68e+6]Epoch 1245/3000:  42%|████▏     | 1245/3000 [12:25<11:52,  2.46it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.68e+6]Epoch 1245/3000:  42%|████▏     | 1245/3000 [12:25<11:52,  2.46it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.68e+6]Epoch 1246/3000:  42%|████▏     | 1245/3000 [12:25<11:52,  2.46it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.68e+6]Epoch 1246/3000:  42%|████▏     | 1246/3000 [12:25<12:06,  2.42it/s, v_num=1, train_loss_step=1.73e+6, train_loss_epoch=1.68e+6]Epoch 1246/3000:  42%|████▏     | 1246/3000 [12:25<12:06,  2.42it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.68e+6]Epoch 1247/3000:  42%|████▏     | 1246/3000 [12:25<12:06,  2.42it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.68e+6]Epoch 1247/3000:  42%|████▏     | 1247/3000 [12:26<12:41,  2.30it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.68e+6]Epoch 1247/3000:  42%|████▏     | 1247/3000 [12:26<12:41,  2.30it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.68e+6]Epoch 1248/3000:  42%|████▏     | 1247/3000 [12:26<12:41,  2.30it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.68e+6]Epoch 1248/3000:  42%|████▏     | 1248/3000 [12:26<12:55,  2.26it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.68e+6]Epoch 1248/3000:  42%|████▏     | 1248/3000 [12:26<12:55,  2.26it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.68e+6]Epoch 1249/3000:  42%|████▏     | 1248/3000 [12:26<12:55,  2.26it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.68e+6]Epoch 1249/3000:  42%|████▏     | 1249/3000 [12:27<13:21,  2.18it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.68e+6]Epoch 1249/3000:  42%|████▏     | 1249/3000 [12:27<13:21,  2.18it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.68e+6]Epoch 1250/3000:  42%|████▏     | 1249/3000 [12:27<13:21,  2.18it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.68e+6]Epoch 1250/3000:  42%|████▏     | 1250/3000 [12:27<13:22,  2.18it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.68e+6]Epoch 1250/3000:  42%|████▏     | 1250/3000 [12:27<13:22,  2.18it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.68e+6] Epoch 1251/3000:  42%|████▏     | 1250/3000 [12:27<13:22,  2.18it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.68e+6]Epoch 1251/3000:  42%|████▏     | 1251/3000 [12:28<13:19,  2.19it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.68e+6]Epoch 1251/3000:  42%|████▏     | 1251/3000 [12:28<13:19,  2.19it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.68e+6]Epoch 1252/3000:  42%|████▏     | 1251/3000 [12:28<13:19,  2.19it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.68e+6]Epoch 1252/3000:  42%|████▏     | 1252/3000 [12:28<12:34,  2.32it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.68e+6]Epoch 1252/3000:  42%|████▏     | 1252/3000 [12:28<12:34,  2.32it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.68e+6]Epoch 1253/3000:  42%|████▏     | 1252/3000 [12:28<12:34,  2.32it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.68e+6]Epoch 1253/3000:  42%|████▏     | 1253/3000 [12:29<12:17,  2.37it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.68e+6]Epoch 1253/3000:  42%|████▏     | 1253/3000 [12:29<12:17,  2.37it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.68e+6]Epoch 1254/3000:  42%|████▏     | 1253/3000 [12:29<12:17,  2.37it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.68e+6]Epoch 1254/3000:  42%|████▏     | 1254/3000 [12:29<12:52,  2.26it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.68e+6]Epoch 1254/3000:  42%|████▏     | 1254/3000 [12:29<12:52,  2.26it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.68e+6]Epoch 1255/3000:  42%|████▏     | 1254/3000 [12:29<12:52,  2.26it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.68e+6]Epoch 1255/3000:  42%|████▏     | 1255/3000 [12:29<12:31,  2.32it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.68e+6]Epoch 1255/3000:  42%|████▏     | 1255/3000 [12:29<12:31,  2.32it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.68e+6]Epoch 1256/3000:  42%|████▏     | 1255/3000 [12:29<12:31,  2.32it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.68e+6]Epoch 1256/3000:  42%|████▏     | 1256/3000 [12:30<12:29,  2.33it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.68e+6]Epoch 1256/3000:  42%|████▏     | 1256/3000 [12:30<12:29,  2.33it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.68e+6]Epoch 1257/3000:  42%|████▏     | 1256/3000 [12:30<12:29,  2.33it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.68e+6]Epoch 1257/3000:  42%|████▏     | 1257/3000 [12:30<12:18,  2.36it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.68e+6]Epoch 1257/3000:  42%|████▏     | 1257/3000 [12:30<12:18,  2.36it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.68e+6]Epoch 1258/3000:  42%|████▏     | 1257/3000 [12:30<12:18,  2.36it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.68e+6]Epoch 1258/3000:  42%|████▏     | 1258/3000 [12:31<12:01,  2.42it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.68e+6]Epoch 1258/3000:  42%|████▏     | 1258/3000 [12:31<12:01,  2.42it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.67e+6]Epoch 1259/3000:  42%|████▏     | 1258/3000 [12:31<12:01,  2.42it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.67e+6]Epoch 1259/3000:  42%|████▏     | 1259/3000 [12:31<12:05,  2.40it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.67e+6]Epoch 1259/3000:  42%|████▏     | 1259/3000 [12:31<12:05,  2.40it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.67e+6]Epoch 1260/3000:  42%|████▏     | 1259/3000 [12:31<12:05,  2.40it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.67e+6]Epoch 1260/3000:  42%|████▏     | 1260/3000 [12:32<12:13,  2.37it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.67e+6]Epoch 1260/3000:  42%|████▏     | 1260/3000 [12:32<12:13,  2.37it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.67e+6]Epoch 1261/3000:  42%|████▏     | 1260/3000 [12:32<12:13,  2.37it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.67e+6]Epoch 1261/3000:  42%|████▏     | 1261/3000 [12:32<12:21,  2.34it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.67e+6]Epoch 1261/3000:  42%|████▏     | 1261/3000 [12:32<12:21,  2.34it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.67e+6]Epoch 1262/3000:  42%|████▏     | 1261/3000 [12:32<12:21,  2.34it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.67e+6]Epoch 1262/3000:  42%|████▏     | 1262/3000 [12:32<11:43,  2.47it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.67e+6]Epoch 1262/3000:  42%|████▏     | 1262/3000 [12:32<11:43,  2.47it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.67e+6] Epoch 1263/3000:  42%|████▏     | 1262/3000 [12:32<11:43,  2.47it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.67e+6]Epoch 1263/3000:  42%|████▏     | 1263/3000 [12:33<11:15,  2.57it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.67e+6]Epoch 1263/3000:  42%|████▏     | 1263/3000 [12:33<11:15,  2.57it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.67e+6]Epoch 1264/3000:  42%|████▏     | 1263/3000 [12:33<11:15,  2.57it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.67e+6]Epoch 1264/3000:  42%|████▏     | 1264/3000 [12:33<11:24,  2.54it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.67e+6]Epoch 1264/3000:  42%|████▏     | 1264/3000 [12:33<11:24,  2.54it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.67e+6]Epoch 1265/3000:  42%|████▏     | 1264/3000 [12:33<11:24,  2.54it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.67e+6]Epoch 1265/3000:  42%|████▏     | 1265/3000 [12:34<11:49,  2.44it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.67e+6]Epoch 1265/3000:  42%|████▏     | 1265/3000 [12:34<11:49,  2.44it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.67e+6]Epoch 1266/3000:  42%|████▏     | 1265/3000 [12:34<11:49,  2.44it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.67e+6]Epoch 1266/3000:  42%|████▏     | 1266/3000 [12:34<12:34,  2.30it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.67e+6]Epoch 1266/3000:  42%|████▏     | 1266/3000 [12:34<12:34,  2.30it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.67e+6]Epoch 1267/3000:  42%|████▏     | 1266/3000 [12:34<12:34,  2.30it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.67e+6]Epoch 1267/3000:  42%|████▏     | 1267/3000 [12:35<12:48,  2.26it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.67e+6]Epoch 1267/3000:  42%|████▏     | 1267/3000 [12:35<12:48,  2.26it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.67e+6] Epoch 1268/3000:  42%|████▏     | 1267/3000 [12:35<12:48,  2.26it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.67e+6]Epoch 1268/3000:  42%|████▏     | 1268/3000 [12:35<12:29,  2.31it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.67e+6]Epoch 1268/3000:  42%|████▏     | 1268/3000 [12:35<12:29,  2.31it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.67e+6]Epoch 1269/3000:  42%|████▏     | 1268/3000 [12:35<12:29,  2.31it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.67e+6]Epoch 1269/3000:  42%|████▏     | 1269/3000 [12:35<12:09,  2.37it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.67e+6]Epoch 1269/3000:  42%|████▏     | 1269/3000 [12:35<12:09,  2.37it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.67e+6]Epoch 1270/3000:  42%|████▏     | 1269/3000 [12:35<12:09,  2.37it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.67e+6]Epoch 1270/3000:  42%|████▏     | 1270/3000 [12:36<12:22,  2.33it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.67e+6]Epoch 1270/3000:  42%|████▏     | 1270/3000 [12:36<12:22,  2.33it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.67e+6]Epoch 1271/3000:  42%|████▏     | 1270/3000 [12:36<12:22,  2.33it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.67e+6]Epoch 1271/3000:  42%|████▏     | 1271/3000 [12:36<12:53,  2.24it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.67e+6]Epoch 1271/3000:  42%|████▏     | 1271/3000 [12:36<12:53,  2.24it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.67e+6]Epoch 1272/3000:  42%|████▏     | 1271/3000 [12:36<12:53,  2.24it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.67e+6]Epoch 1272/3000:  42%|████▏     | 1272/3000 [12:37<13:02,  2.21it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.67e+6]Epoch 1272/3000:  42%|████▏     | 1272/3000 [12:37<13:02,  2.21it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.67e+6]Epoch 1273/3000:  42%|████▏     | 1272/3000 [12:37<13:02,  2.21it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.67e+6]Epoch 1273/3000:  42%|████▏     | 1273/3000 [12:37<12:40,  2.27it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.67e+6]Epoch 1273/3000:  42%|████▏     | 1273/3000 [12:37<12:40,  2.27it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.67e+6]Epoch 1274/3000:  42%|████▏     | 1273/3000 [12:37<12:40,  2.27it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.67e+6]Epoch 1274/3000:  42%|████▏     | 1274/3000 [12:38<12:43,  2.26it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.67e+6]Epoch 1274/3000:  42%|████▏     | 1274/3000 [12:38<12:43,  2.26it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.67e+6] Epoch 1275/3000:  42%|████▏     | 1274/3000 [12:38<12:43,  2.26it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.67e+6]Epoch 1275/3000:  42%|████▎     | 1275/3000 [12:38<12:51,  2.24it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.67e+6]Epoch 1275/3000:  42%|████▎     | 1275/3000 [12:38<12:51,  2.24it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.67e+6]Epoch 1276/3000:  42%|████▎     | 1275/3000 [12:38<12:51,  2.24it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.67e+6]Epoch 1276/3000:  43%|████▎     | 1276/3000 [12:38<12:53,  2.23it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.67e+6]Epoch 1276/3000:  43%|████▎     | 1276/3000 [12:38<12:53,  2.23it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.67e+6]Epoch 1277/3000:  43%|████▎     | 1276/3000 [12:38<12:53,  2.23it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.67e+6]Epoch 1277/3000:  43%|████▎     | 1277/3000 [12:39<12:54,  2.22it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.67e+6]Epoch 1277/3000:  43%|████▎     | 1277/3000 [12:39<12:54,  2.22it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.67e+6]Epoch 1278/3000:  43%|████▎     | 1277/3000 [12:39<12:54,  2.22it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.67e+6]Epoch 1278/3000:  43%|████▎     | 1278/3000 [12:39<12:52,  2.23it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.67e+6]Epoch 1278/3000:  43%|████▎     | 1278/3000 [12:39<12:52,  2.23it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.67e+6]Epoch 1279/3000:  43%|████▎     | 1278/3000 [12:39<12:52,  2.23it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.67e+6]Epoch 1279/3000:  43%|████▎     | 1279/3000 [12:40<12:23,  2.32it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.67e+6]Epoch 1279/3000:  43%|████▎     | 1279/3000 [12:40<12:23,  2.32it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.67e+6]Epoch 1280/3000:  43%|████▎     | 1279/3000 [12:40<12:23,  2.32it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.67e+6]Epoch 1280/3000:  43%|████▎     | 1280/3000 [12:40<12:25,  2.31it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.67e+6]Epoch 1280/3000:  43%|████▎     | 1280/3000 [12:40<12:25,  2.31it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.66e+6]Epoch 1281/3000:  43%|████▎     | 1280/3000 [12:40<12:25,  2.31it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.66e+6]Epoch 1281/3000:  43%|████▎     | 1281/3000 [12:41<11:57,  2.39it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.66e+6]Epoch 1281/3000:  43%|████▎     | 1281/3000 [12:41<11:57,  2.39it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.66e+6]Epoch 1282/3000:  43%|████▎     | 1281/3000 [12:41<11:57,  2.39it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.66e+6]Epoch 1282/3000:  43%|████▎     | 1282/3000 [12:41<11:41,  2.45it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.66e+6]Epoch 1282/3000:  43%|████▎     | 1282/3000 [12:41<11:41,  2.45it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.66e+6]Epoch 1283/3000:  43%|████▎     | 1282/3000 [12:41<11:41,  2.45it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.66e+6]Epoch 1283/3000:  43%|████▎     | 1283/3000 [12:41<12:05,  2.37it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.66e+6]Epoch 1283/3000:  43%|████▎     | 1283/3000 [12:41<12:05,  2.37it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.66e+6]Epoch 1284/3000:  43%|████▎     | 1283/3000 [12:41<12:05,  2.37it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.66e+6]Epoch 1284/3000:  43%|████▎     | 1284/3000 [12:42<12:11,  2.35it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.66e+6]Epoch 1284/3000:  43%|████▎     | 1284/3000 [12:42<12:11,  2.35it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.66e+6]Epoch 1285/3000:  43%|████▎     | 1284/3000 [12:42<12:11,  2.35it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.66e+6]Epoch 1285/3000:  43%|████▎     | 1285/3000 [12:42<12:25,  2.30it/s, v_num=1, train_loss_step=1.76e+6, train_loss_epoch=1.66e+6]Epoch 1285/3000:  43%|████▎     | 1285/3000 [12:42<12:25,  2.30it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.66e+6]Epoch 1286/3000:  43%|████▎     | 1285/3000 [12:42<12:25,  2.30it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.66e+6]Epoch 1286/3000:  43%|████▎     | 1286/3000 [12:43<12:54,  2.21it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.66e+6]Epoch 1286/3000:  43%|████▎     | 1286/3000 [12:43<12:54,  2.21it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.66e+6]Epoch 1287/3000:  43%|████▎     | 1286/3000 [12:43<12:54,  2.21it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.66e+6]Epoch 1287/3000:  43%|████▎     | 1287/3000 [12:43<12:50,  2.22it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.66e+6]Epoch 1287/3000:  43%|████▎     | 1287/3000 [12:43<12:50,  2.22it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.66e+6]Epoch 1288/3000:  43%|████▎     | 1287/3000 [12:43<12:50,  2.22it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.66e+6]Epoch 1288/3000:  43%|████▎     | 1288/3000 [12:44<12:54,  2.21it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.66e+6]Epoch 1288/3000:  43%|████▎     | 1288/3000 [12:44<12:54,  2.21it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.66e+6]Epoch 1289/3000:  43%|████▎     | 1288/3000 [12:44<12:54,  2.21it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.66e+6]Epoch 1289/3000:  43%|████▎     | 1289/3000 [12:44<13:03,  2.18it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.66e+6]Epoch 1289/3000:  43%|████▎     | 1289/3000 [12:44<13:03,  2.18it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.66e+6]Epoch 1290/3000:  43%|████▎     | 1289/3000 [12:44<13:03,  2.18it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.66e+6]Epoch 1290/3000:  43%|████▎     | 1290/3000 [12:45<12:46,  2.23it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.66e+6]Epoch 1290/3000:  43%|████▎     | 1290/3000 [12:45<12:46,  2.23it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.66e+6]Epoch 1291/3000:  43%|████▎     | 1290/3000 [12:45<12:46,  2.23it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.66e+6]Epoch 1291/3000:  43%|████▎     | 1291/3000 [12:45<12:15,  2.32it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.66e+6]Epoch 1291/3000:  43%|████▎     | 1291/3000 [12:45<12:15,  2.32it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.66e+6]Epoch 1292/3000:  43%|████▎     | 1291/3000 [12:45<12:15,  2.32it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.66e+6]Epoch 1292/3000:  43%|████▎     | 1292/3000 [12:45<12:32,  2.27it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.66e+6]Epoch 1292/3000:  43%|████▎     | 1292/3000 [12:45<12:32,  2.27it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.66e+6]Epoch 1293/3000:  43%|████▎     | 1292/3000 [12:45<12:32,  2.27it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.66e+6]Epoch 1293/3000:  43%|████▎     | 1293/3000 [12:46<12:47,  2.22it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.66e+6]Epoch 1293/3000:  43%|████▎     | 1293/3000 [12:46<12:47,  2.22it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.66e+6]Epoch 1294/3000:  43%|████▎     | 1293/3000 [12:46<12:47,  2.22it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.66e+6]Epoch 1294/3000:  43%|████▎     | 1294/3000 [12:46<12:19,  2.31it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.66e+6]Epoch 1294/3000:  43%|████▎     | 1294/3000 [12:46<12:19,  2.31it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.66e+6]Epoch 1295/3000:  43%|████▎     | 1294/3000 [12:46<12:19,  2.31it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.66e+6]Epoch 1295/3000:  43%|████▎     | 1295/3000 [12:47<11:52,  2.39it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.66e+6]Epoch 1295/3000:  43%|████▎     | 1295/3000 [12:47<11:52,  2.39it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.66e+6]Epoch 1296/3000:  43%|████▎     | 1295/3000 [12:47<11:52,  2.39it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.66e+6]Epoch 1296/3000:  43%|████▎     | 1296/3000 [12:47<12:10,  2.33it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.66e+6]Epoch 1296/3000:  43%|████▎     | 1296/3000 [12:47<12:10,  2.33it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.66e+6]Epoch 1297/3000:  43%|████▎     | 1296/3000 [12:47<12:10,  2.33it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.66e+6]Epoch 1297/3000:  43%|████▎     | 1297/3000 [12:48<12:39,  2.24it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.66e+6]Epoch 1297/3000:  43%|████▎     | 1297/3000 [12:48<12:39,  2.24it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.66e+6]Epoch 1298/3000:  43%|████▎     | 1297/3000 [12:48<12:39,  2.24it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.66e+6]Epoch 1298/3000:  43%|████▎     | 1298/3000 [12:48<12:26,  2.28it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.66e+6]Epoch 1298/3000:  43%|████▎     | 1298/3000 [12:48<12:26,  2.28it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.66e+6]Epoch 1299/3000:  43%|████▎     | 1298/3000 [12:48<12:26,  2.28it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.66e+6]Epoch 1299/3000:  43%|████▎     | 1299/3000 [12:49<12:26,  2.28it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.66e+6]Epoch 1299/3000:  43%|████▎     | 1299/3000 [12:49<12:26,  2.28it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.66e+6]Epoch 1300/3000:  43%|████▎     | 1299/3000 [12:49<12:26,  2.28it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.66e+6]Epoch 1300/3000:  43%|████▎     | 1300/3000 [12:49<12:16,  2.31it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.66e+6]Epoch 1300/3000:  43%|████▎     | 1300/3000 [12:49<12:16,  2.31it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.66e+6]Epoch 1301/3000:  43%|████▎     | 1300/3000 [12:49<12:16,  2.31it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.66e+6]Epoch 1301/3000:  43%|████▎     | 1301/3000 [12:49<12:10,  2.33it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.66e+6]Epoch 1301/3000:  43%|████▎     | 1301/3000 [12:49<12:10,  2.33it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.66e+6] Epoch 1302/3000:  43%|████▎     | 1301/3000 [12:49<12:10,  2.33it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.66e+6]Epoch 1302/3000:  43%|████▎     | 1302/3000 [12:50<12:05,  2.34it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.66e+6]Epoch 1302/3000:  43%|████▎     | 1302/3000 [12:50<12:05,  2.34it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.65e+6]Epoch 1303/3000:  43%|████▎     | 1302/3000 [12:50<12:05,  2.34it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.65e+6]Epoch 1303/3000:  43%|████▎     | 1303/3000 [12:50<12:15,  2.31it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.65e+6]Epoch 1303/3000:  43%|████▎     | 1303/3000 [12:50<12:15,  2.31it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.65e+6]Epoch 1304/3000:  43%|████▎     | 1303/3000 [12:50<12:15,  2.31it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.65e+6]Epoch 1304/3000:  43%|████▎     | 1304/3000 [12:51<12:14,  2.31it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.65e+6]Epoch 1304/3000:  43%|████▎     | 1304/3000 [12:51<12:14,  2.31it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.65e+6]Epoch 1305/3000:  43%|████▎     | 1304/3000 [12:51<12:14,  2.31it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.65e+6]Epoch 1305/3000:  44%|████▎     | 1305/3000 [12:51<12:09,  2.32it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.65e+6]Epoch 1305/3000:  44%|████▎     | 1305/3000 [12:51<12:09,  2.32it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.65e+6]Epoch 1306/3000:  44%|████▎     | 1305/3000 [12:51<12:09,  2.32it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.65e+6]Epoch 1306/3000:  44%|████▎     | 1306/3000 [12:52<12:23,  2.28it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.65e+6]Epoch 1306/3000:  44%|████▎     | 1306/3000 [12:52<12:23,  2.28it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.65e+6]Epoch 1307/3000:  44%|████▎     | 1306/3000 [12:52<12:23,  2.28it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.65e+6]Epoch 1307/3000:  44%|████▎     | 1307/3000 [12:52<12:50,  2.20it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.65e+6]Epoch 1307/3000:  44%|████▎     | 1307/3000 [12:52<12:50,  2.20it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.65e+6]Epoch 1308/3000:  44%|████▎     | 1307/3000 [12:52<12:50,  2.20it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.65e+6]Epoch 1308/3000:  44%|████▎     | 1308/3000 [12:52<12:34,  2.24it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.65e+6]Epoch 1308/3000:  44%|████▎     | 1308/3000 [12:52<12:34,  2.24it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.65e+6]Epoch 1309/3000:  44%|████▎     | 1308/3000 [12:52<12:34,  2.24it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.65e+6]Epoch 1309/3000:  44%|████▎     | 1309/3000 [12:53<12:39,  2.23it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.65e+6]Epoch 1309/3000:  44%|████▎     | 1309/3000 [12:53<12:39,  2.23it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.65e+6] Epoch 1310/3000:  44%|████▎     | 1309/3000 [12:53<12:39,  2.23it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.65e+6]Epoch 1310/3000:  44%|████▎     | 1310/3000 [12:53<12:46,  2.20it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.65e+6]Epoch 1310/3000:  44%|████▎     | 1310/3000 [12:53<12:46,  2.20it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.65e+6]Epoch 1311/3000:  44%|████▎     | 1310/3000 [12:53<12:46,  2.20it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.65e+6]Epoch 1311/3000:  44%|████▎     | 1311/3000 [12:54<11:55,  2.36it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.65e+6]Epoch 1311/3000:  44%|████▎     | 1311/3000 [12:54<11:55,  2.36it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.65e+6]Epoch 1312/3000:  44%|████▎     | 1311/3000 [12:54<11:55,  2.36it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.65e+6]Epoch 1312/3000:  44%|████▎     | 1312/3000 [12:54<11:46,  2.39it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.65e+6]Epoch 1312/3000:  44%|████▎     | 1312/3000 [12:54<11:46,  2.39it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.65e+6]Epoch 1313/3000:  44%|████▎     | 1312/3000 [12:54<11:46,  2.39it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.65e+6]Epoch 1313/3000:  44%|████▍     | 1313/3000 [12:55<11:34,  2.43it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.65e+6]Epoch 1313/3000:  44%|████▍     | 1313/3000 [12:55<11:34,  2.43it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.65e+6]Epoch 1314/3000:  44%|████▍     | 1313/3000 [12:55<11:34,  2.43it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.65e+6]Epoch 1314/3000:  44%|████▍     | 1314/3000 [12:55<11:52,  2.37it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.65e+6]Epoch 1314/3000:  44%|████▍     | 1314/3000 [12:55<11:52,  2.37it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.65e+6]Epoch 1315/3000:  44%|████▍     | 1314/3000 [12:55<11:52,  2.37it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.65e+6]Epoch 1315/3000:  44%|████▍     | 1315/3000 [12:55<12:18,  2.28it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.65e+6]Epoch 1315/3000:  44%|████▍     | 1315/3000 [12:55<12:18,  2.28it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.65e+6]Epoch 1316/3000:  44%|████▍     | 1315/3000 [12:55<12:18,  2.28it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.65e+6]Epoch 1316/3000:  44%|████▍     | 1316/3000 [12:56<12:02,  2.33it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.65e+6]Epoch 1316/3000:  44%|████▍     | 1316/3000 [12:56<12:02,  2.33it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.65e+6]Epoch 1317/3000:  44%|████▍     | 1316/3000 [12:56<12:02,  2.33it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.65e+6]Epoch 1317/3000:  44%|████▍     | 1317/3000 [12:56<12:23,  2.26it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.65e+6]Epoch 1317/3000:  44%|████▍     | 1317/3000 [12:56<12:23,  2.26it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.65e+6]Epoch 1318/3000:  44%|████▍     | 1317/3000 [12:56<12:23,  2.26it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.65e+6]Epoch 1318/3000:  44%|████▍     | 1318/3000 [12:57<12:17,  2.28it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.65e+6]Epoch 1318/3000:  44%|████▍     | 1318/3000 [12:57<12:17,  2.28it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.65e+6]Epoch 1319/3000:  44%|████▍     | 1318/3000 [12:57<12:17,  2.28it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.65e+6]Epoch 1319/3000:  44%|████▍     | 1319/3000 [12:57<12:22,  2.26it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.65e+6]Epoch 1319/3000:  44%|████▍     | 1319/3000 [12:57<12:22,  2.26it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.65e+6]Epoch 1320/3000:  44%|████▍     | 1319/3000 [12:57<12:22,  2.26it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.65e+6]Epoch 1320/3000:  44%|████▍     | 1320/3000 [12:58<12:20,  2.27it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.65e+6]Epoch 1320/3000:  44%|████▍     | 1320/3000 [12:58<12:20,  2.27it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.65e+6]Epoch 1321/3000:  44%|████▍     | 1320/3000 [12:58<12:20,  2.27it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.65e+6]Epoch 1321/3000:  44%|████▍     | 1321/3000 [12:58<12:25,  2.25it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.65e+6]Epoch 1321/3000:  44%|████▍     | 1321/3000 [12:58<12:25,  2.25it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.65e+6]Epoch 1322/3000:  44%|████▍     | 1321/3000 [12:58<12:25,  2.25it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.65e+6]Epoch 1322/3000:  44%|████▍     | 1322/3000 [12:59<12:07,  2.31it/s, v_num=1, train_loss_step=1.72e+6, train_loss_epoch=1.65e+6]Epoch 1322/3000:  44%|████▍     | 1322/3000 [12:59<12:07,  2.31it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.65e+6] Epoch 1323/3000:  44%|████▍     | 1322/3000 [12:59<12:07,  2.31it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.65e+6]Epoch 1323/3000:  44%|████▍     | 1323/3000 [12:59<11:53,  2.35it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.65e+6]Epoch 1323/3000:  44%|████▍     | 1323/3000 [12:59<11:53,  2.35it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.65e+6]Epoch 1324/3000:  44%|████▍     | 1323/3000 [12:59<11:53,  2.35it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.65e+6]Epoch 1324/3000:  44%|████▍     | 1324/3000 [12:59<12:16,  2.28it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.65e+6]Epoch 1324/3000:  44%|████▍     | 1324/3000 [12:59<12:16,  2.28it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.65e+6]Epoch 1325/3000:  44%|████▍     | 1324/3000 [12:59<12:16,  2.28it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.65e+6]Epoch 1325/3000:  44%|████▍     | 1325/3000 [13:00<12:16,  2.28it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.65e+6]Epoch 1325/3000:  44%|████▍     | 1325/3000 [13:00<12:16,  2.28it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.65e+6]Epoch 1326/3000:  44%|████▍     | 1325/3000 [13:00<12:16,  2.28it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.65e+6]Epoch 1326/3000:  44%|████▍     | 1326/3000 [13:00<12:14,  2.28it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.65e+6]Epoch 1326/3000:  44%|████▍     | 1326/3000 [13:00<12:14,  2.28it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.64e+6] Epoch 1327/3000:  44%|████▍     | 1326/3000 [13:00<12:14,  2.28it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.64e+6]Epoch 1327/3000:  44%|████▍     | 1327/3000 [13:01<12:31,  2.22it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.64e+6]Epoch 1327/3000:  44%|████▍     | 1327/3000 [13:01<12:31,  2.22it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.64e+6]Epoch 1328/3000:  44%|████▍     | 1327/3000 [13:01<12:31,  2.22it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.64e+6]Epoch 1328/3000:  44%|████▍     | 1328/3000 [13:01<12:48,  2.17it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.64e+6]Epoch 1328/3000:  44%|████▍     | 1328/3000 [13:01<12:48,  2.17it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.64e+6]Epoch 1329/3000:  44%|████▍     | 1328/3000 [13:01<12:48,  2.17it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.64e+6]Epoch 1329/3000:  44%|████▍     | 1329/3000 [13:02<12:16,  2.27it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.64e+6]Epoch 1329/3000:  44%|████▍     | 1329/3000 [13:02<12:16,  2.27it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.64e+6]Epoch 1330/3000:  44%|████▍     | 1329/3000 [13:02<12:16,  2.27it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.64e+6]Epoch 1330/3000:  44%|████▍     | 1330/3000 [13:02<12:04,  2.31it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.64e+6]Epoch 1330/3000:  44%|████▍     | 1330/3000 [13:02<12:04,  2.31it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.64e+6]Epoch 1331/3000:  44%|████▍     | 1330/3000 [13:02<12:04,  2.31it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.64e+6]Epoch 1331/3000:  44%|████▍     | 1331/3000 [13:03<12:13,  2.28it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.64e+6]Epoch 1331/3000:  44%|████▍     | 1331/3000 [13:03<12:13,  2.28it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.64e+6]Epoch 1332/3000:  44%|████▍     | 1331/3000 [13:03<12:13,  2.28it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.64e+6]Epoch 1332/3000:  44%|████▍     | 1332/3000 [13:03<12:50,  2.17it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.64e+6]Epoch 1332/3000:  44%|████▍     | 1332/3000 [13:03<12:50,  2.17it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.64e+6]Epoch 1333/3000:  44%|████▍     | 1332/3000 [13:03<12:50,  2.17it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.64e+6]Epoch 1333/3000:  44%|████▍     | 1333/3000 [13:03<11:53,  2.34it/s, v_num=1, train_loss_step=1.71e+6, train_loss_epoch=1.64e+6]Epoch 1333/3000:  44%|████▍     | 1333/3000 [13:03<11:53,  2.34it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.64e+6]Epoch 1334/3000:  44%|████▍     | 1333/3000 [13:03<11:53,  2.34it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.64e+6]Epoch 1334/3000:  44%|████▍     | 1334/3000 [13:04<11:35,  2.40it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.64e+6]Epoch 1334/3000:  44%|████▍     | 1334/3000 [13:04<11:35,  2.40it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.64e+6]Epoch 1335/3000:  44%|████▍     | 1334/3000 [13:04<11:35,  2.40it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.64e+6]Epoch 1335/3000:  44%|████▍     | 1335/3000 [13:04<11:22,  2.44it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.64e+6]Epoch 1335/3000:  44%|████▍     | 1335/3000 [13:04<11:22,  2.44it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.64e+6]Epoch 1336/3000:  44%|████▍     | 1335/3000 [13:04<11:22,  2.44it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.64e+6]Epoch 1336/3000:  45%|████▍     | 1336/3000 [13:05<11:09,  2.49it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.64e+6]Epoch 1336/3000:  45%|████▍     | 1336/3000 [13:05<11:09,  2.49it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.64e+6]Epoch 1337/3000:  45%|████▍     | 1336/3000 [13:05<11:09,  2.49it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.64e+6]Epoch 1337/3000:  45%|████▍     | 1337/3000 [13:05<11:33,  2.40it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.64e+6]Epoch 1337/3000:  45%|████▍     | 1337/3000 [13:05<11:33,  2.40it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.64e+6]Epoch 1338/3000:  45%|████▍     | 1337/3000 [13:05<11:33,  2.40it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.64e+6]Epoch 1338/3000:  45%|████▍     | 1338/3000 [13:05<12:03,  2.30it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.64e+6]Epoch 1338/3000:  45%|████▍     | 1338/3000 [13:05<12:03,  2.30it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.64e+6] Epoch 1339/3000:  45%|████▍     | 1338/3000 [13:05<12:03,  2.30it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.64e+6]Epoch 1339/3000:  45%|████▍     | 1339/3000 [13:06<12:41,  2.18it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.64e+6]Epoch 1339/3000:  45%|████▍     | 1339/3000 [13:06<12:41,  2.18it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.64e+6]Epoch 1340/3000:  45%|████▍     | 1339/3000 [13:06<12:41,  2.18it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.64e+6]Epoch 1340/3000:  45%|████▍     | 1340/3000 [13:06<12:53,  2.15it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.64e+6]Epoch 1340/3000:  45%|████▍     | 1340/3000 [13:06<12:53,  2.15it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.64e+6]Epoch 1341/3000:  45%|████▍     | 1340/3000 [13:06<12:53,  2.15it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.64e+6]Epoch 1341/3000:  45%|████▍     | 1341/3000 [13:07<12:15,  2.25it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.64e+6]Epoch 1341/3000:  45%|████▍     | 1341/3000 [13:07<12:15,  2.25it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.64e+6]Epoch 1342/3000:  45%|████▍     | 1341/3000 [13:07<12:15,  2.25it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.64e+6]Epoch 1342/3000:  45%|████▍     | 1342/3000 [13:07<12:09,  2.27it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.64e+6]Epoch 1342/3000:  45%|████▍     | 1342/3000 [13:07<12:09,  2.27it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.64e+6]Epoch 1343/3000:  45%|████▍     | 1342/3000 [13:07<12:09,  2.27it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.64e+6]Epoch 1343/3000:  45%|████▍     | 1343/3000 [13:08<12:50,  2.15it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.64e+6]Epoch 1343/3000:  45%|████▍     | 1343/3000 [13:08<12:50,  2.15it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.64e+6]Epoch 1344/3000:  45%|████▍     | 1343/3000 [13:08<12:50,  2.15it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.64e+6]Epoch 1344/3000:  45%|████▍     | 1344/3000 [13:08<12:39,  2.18it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.64e+6]Epoch 1344/3000:  45%|████▍     | 1344/3000 [13:08<12:39,  2.18it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.64e+6]Epoch 1345/3000:  45%|████▍     | 1344/3000 [13:08<12:39,  2.18it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.64e+6]Epoch 1345/3000:  45%|████▍     | 1345/3000 [13:09<12:53,  2.14it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.64e+6]Epoch 1345/3000:  45%|████▍     | 1345/3000 [13:09<12:53,  2.14it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.64e+6]Epoch 1346/3000:  45%|████▍     | 1345/3000 [13:09<12:53,  2.14it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.64e+6]Epoch 1346/3000:  45%|████▍     | 1346/3000 [13:09<12:08,  2.27it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.64e+6]Epoch 1346/3000:  45%|████▍     | 1346/3000 [13:09<12:08,  2.27it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.64e+6]Epoch 1347/3000:  45%|████▍     | 1346/3000 [13:09<12:08,  2.27it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.64e+6]Epoch 1347/3000:  45%|████▍     | 1347/3000 [13:10<11:41,  2.36it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.64e+6]Epoch 1347/3000:  45%|████▍     | 1347/3000 [13:10<11:41,  2.36it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.64e+6]Epoch 1348/3000:  45%|████▍     | 1347/3000 [13:10<11:41,  2.36it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.64e+6]Epoch 1348/3000:  45%|████▍     | 1348/3000 [13:10<11:17,  2.44it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.64e+6]Epoch 1348/3000:  45%|████▍     | 1348/3000 [13:10<11:17,  2.44it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.64e+6] Epoch 1349/3000:  45%|████▍     | 1348/3000 [13:10<11:17,  2.44it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.64e+6]Epoch 1349/3000:  45%|████▍     | 1349/3000 [13:10<11:32,  2.38it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.64e+6]Epoch 1349/3000:  45%|████▍     | 1349/3000 [13:10<11:32,  2.38it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.64e+6]Epoch 1350/3000:  45%|████▍     | 1349/3000 [13:10<11:32,  2.38it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.64e+6]Epoch 1350/3000:  45%|████▌     | 1350/3000 [13:11<10:53,  2.53it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.64e+6]Epoch 1350/3000:  45%|████▌     | 1350/3000 [13:11<10:53,  2.53it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.63e+6]Epoch 1351/3000:  45%|████▌     | 1350/3000 [13:11<10:53,  2.53it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.63e+6]Epoch 1351/3000:  45%|████▌     | 1351/3000 [13:11<11:07,  2.47it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.63e+6]Epoch 1351/3000:  45%|████▌     | 1351/3000 [13:11<11:07,  2.47it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.63e+6]Epoch 1352/3000:  45%|████▌     | 1351/3000 [13:11<11:07,  2.47it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.63e+6]Epoch 1352/3000:  45%|████▌     | 1352/3000 [13:12<12:00,  2.29it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.63e+6]Epoch 1352/3000:  45%|████▌     | 1352/3000 [13:12<12:00,  2.29it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.63e+6]Epoch 1353/3000:  45%|████▌     | 1352/3000 [13:12<12:00,  2.29it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.63e+6]Epoch 1353/3000:  45%|████▌     | 1353/3000 [13:12<11:50,  2.32it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.63e+6]Epoch 1353/3000:  45%|████▌     | 1353/3000 [13:12<11:50,  2.32it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.63e+6]Epoch 1354/3000:  45%|████▌     | 1353/3000 [13:12<11:50,  2.32it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.63e+6]Epoch 1354/3000:  45%|████▌     | 1354/3000 [13:12<11:44,  2.34it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.63e+6]Epoch 1354/3000:  45%|████▌     | 1354/3000 [13:12<11:44,  2.34it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.63e+6]Epoch 1355/3000:  45%|████▌     | 1354/3000 [13:12<11:44,  2.34it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.63e+6]Epoch 1355/3000:  45%|████▌     | 1355/3000 [13:13<11:59,  2.29it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.63e+6]Epoch 1355/3000:  45%|████▌     | 1355/3000 [13:13<11:59,  2.29it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.63e+6]Epoch 1356/3000:  45%|████▌     | 1355/3000 [13:13<11:59,  2.29it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.63e+6]Epoch 1356/3000:  45%|████▌     | 1356/3000 [13:13<12:17,  2.23it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.63e+6]Epoch 1356/3000:  45%|████▌     | 1356/3000 [13:13<12:17,  2.23it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.63e+6] Epoch 1357/3000:  45%|████▌     | 1356/3000 [13:13<12:17,  2.23it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.63e+6]Epoch 1357/3000:  45%|████▌     | 1357/3000 [13:14<12:17,  2.23it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.63e+6]Epoch 1357/3000:  45%|████▌     | 1357/3000 [13:14<12:17,  2.23it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.63e+6]Epoch 1358/3000:  45%|████▌     | 1357/3000 [13:14<12:17,  2.23it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.63e+6]Epoch 1358/3000:  45%|████▌     | 1358/3000 [13:14<12:38,  2.16it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.63e+6]Epoch 1358/3000:  45%|████▌     | 1358/3000 [13:14<12:38,  2.16it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.63e+6]Epoch 1359/3000:  45%|████▌     | 1358/3000 [13:14<12:38,  2.16it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.63e+6]Epoch 1359/3000:  45%|████▌     | 1359/3000 [13:15<12:35,  2.17it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.63e+6]Epoch 1359/3000:  45%|████▌     | 1359/3000 [13:15<12:35,  2.17it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.63e+6]Epoch 1360/3000:  45%|████▌     | 1359/3000 [13:15<12:35,  2.17it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.63e+6]Epoch 1360/3000:  45%|████▌     | 1360/3000 [13:15<12:13,  2.23it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.63e+6]Epoch 1360/3000:  45%|████▌     | 1360/3000 [13:15<12:13,  2.23it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.63e+6]Epoch 1361/3000:  45%|████▌     | 1360/3000 [13:15<12:13,  2.23it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.63e+6]Epoch 1361/3000:  45%|████▌     | 1361/3000 [13:16<11:56,  2.29it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.63e+6]Epoch 1361/3000:  45%|████▌     | 1361/3000 [13:16<11:56,  2.29it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.63e+6]Epoch 1362/3000:  45%|████▌     | 1361/3000 [13:16<11:56,  2.29it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.63e+6]Epoch 1362/3000:  45%|████▌     | 1362/3000 [13:16<11:55,  2.29it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.63e+6]Epoch 1362/3000:  45%|████▌     | 1362/3000 [13:16<11:55,  2.29it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.63e+6]Epoch 1363/3000:  45%|████▌     | 1362/3000 [13:16<11:55,  2.29it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.63e+6]Epoch 1363/3000:  45%|████▌     | 1363/3000 [13:16<12:05,  2.26it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.63e+6]Epoch 1363/3000:  45%|████▌     | 1363/3000 [13:16<12:05,  2.26it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.63e+6]Epoch 1364/3000:  45%|████▌     | 1363/3000 [13:16<12:05,  2.26it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.63e+6]Epoch 1364/3000:  45%|████▌     | 1364/3000 [13:17<12:05,  2.25it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.63e+6]Epoch 1364/3000:  45%|████▌     | 1364/3000 [13:17<12:05,  2.25it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.63e+6]Epoch 1365/3000:  45%|████▌     | 1364/3000 [13:17<12:05,  2.25it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.63e+6]Epoch 1365/3000:  46%|████▌     | 1365/3000 [13:17<12:08,  2.24it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.63e+6]Epoch 1365/3000:  46%|████▌     | 1365/3000 [13:17<12:08,  2.24it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.63e+6]Epoch 1366/3000:  46%|████▌     | 1365/3000 [13:17<12:08,  2.24it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.63e+6]Epoch 1366/3000:  46%|████▌     | 1366/3000 [13:18<12:24,  2.19it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.63e+6]Epoch 1366/3000:  46%|████▌     | 1366/3000 [13:18<12:24,  2.19it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.63e+6]Epoch 1367/3000:  46%|████▌     | 1366/3000 [13:18<12:24,  2.19it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.63e+6]Epoch 1367/3000:  46%|████▌     | 1367/3000 [13:18<12:07,  2.24it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.63e+6]Epoch 1367/3000:  46%|████▌     | 1367/3000 [13:18<12:07,  2.24it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.63e+6] Epoch 1368/3000:  46%|████▌     | 1367/3000 [13:18<12:07,  2.24it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.63e+6]Epoch 1368/3000:  46%|████▌     | 1368/3000 [13:19<11:44,  2.32it/s, v_num=1, train_loss_step=1.7e+6, train_loss_epoch=1.63e+6]Epoch 1368/3000:  46%|████▌     | 1368/3000 [13:19<11:44,  2.32it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.63e+6]Epoch 1369/3000:  46%|████▌     | 1368/3000 [13:19<11:44,  2.32it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.63e+6]Epoch 1369/3000:  46%|████▌     | 1369/3000 [13:19<12:09,  2.23it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.63e+6]Epoch 1369/3000:  46%|████▌     | 1369/3000 [13:19<12:09,  2.23it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.63e+6]Epoch 1370/3000:  46%|████▌     | 1369/3000 [13:19<12:09,  2.23it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.63e+6]Epoch 1370/3000:  46%|████▌     | 1370/3000 [13:20<11:43,  2.32it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.63e+6]Epoch 1370/3000:  46%|████▌     | 1370/3000 [13:20<11:43,  2.32it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.63e+6]Epoch 1371/3000:  46%|████▌     | 1370/3000 [13:20<11:43,  2.32it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.63e+6]Epoch 1371/3000:  46%|████▌     | 1371/3000 [13:20<11:19,  2.40it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.63e+6]Epoch 1371/3000:  46%|████▌     | 1371/3000 [13:20<11:19,  2.40it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.63e+6]Epoch 1372/3000:  46%|████▌     | 1371/3000 [13:20<11:19,  2.40it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.63e+6]Epoch 1372/3000:  46%|████▌     | 1372/3000 [13:20<11:07,  2.44it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.63e+6]Epoch 1372/3000:  46%|████▌     | 1372/3000 [13:20<11:07,  2.44it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.63e+6]Epoch 1373/3000:  46%|████▌     | 1372/3000 [13:20<11:07,  2.44it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.63e+6]Epoch 1373/3000:  46%|████▌     | 1373/3000 [13:21<10:58,  2.47it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.63e+6]Epoch 1373/3000:  46%|████▌     | 1373/3000 [13:21<10:58,  2.47it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.63e+6]Epoch 1374/3000:  46%|████▌     | 1373/3000 [13:21<10:58,  2.47it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.63e+6]Epoch 1374/3000:  46%|████▌     | 1374/3000 [13:21<11:21,  2.39it/s, v_num=1, train_loss_step=1.68e+6, train_loss_epoch=1.63e+6]Epoch 1374/3000:  46%|████▌     | 1374/3000 [13:21<11:21,  2.39it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.63e+6]Epoch 1375/3000:  46%|████▌     | 1374/3000 [13:21<11:21,  2.39it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.63e+6]Epoch 1375/3000:  46%|████▌     | 1375/3000 [13:22<12:02,  2.25it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.63e+6]Epoch 1375/3000:  46%|████▌     | 1375/3000 [13:22<12:02,  2.25it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.63e+6] Epoch 1376/3000:  46%|████▌     | 1375/3000 [13:22<12:02,  2.25it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.63e+6]Epoch 1376/3000:  46%|████▌     | 1376/3000 [13:22<11:55,  2.27it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.63e+6]Epoch 1376/3000:  46%|████▌     | 1376/3000 [13:22<11:55,  2.27it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.62e+6]Epoch 1377/3000:  46%|████▌     | 1376/3000 [13:22<11:55,  2.27it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.62e+6]Epoch 1377/3000:  46%|████▌     | 1377/3000 [13:23<12:26,  2.18it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.62e+6]Epoch 1377/3000:  46%|████▌     | 1377/3000 [13:23<12:26,  2.18it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.62e+6]Epoch 1378/3000:  46%|████▌     | 1377/3000 [13:23<12:26,  2.18it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.62e+6]Epoch 1378/3000:  46%|████▌     | 1378/3000 [13:23<12:50,  2.11it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.62e+6]Epoch 1378/3000:  46%|████▌     | 1378/3000 [13:23<12:50,  2.11it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.62e+6]Epoch 1379/3000:  46%|████▌     | 1378/3000 [13:23<12:50,  2.11it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.62e+6]Epoch 1379/3000:  46%|████▌     | 1379/3000 [13:24<11:55,  2.27it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.62e+6]Epoch 1379/3000:  46%|████▌     | 1379/3000 [13:24<11:55,  2.27it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.62e+6] Epoch 1380/3000:  46%|████▌     | 1379/3000 [13:24<11:55,  2.27it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.62e+6]Epoch 1380/3000:  46%|████▌     | 1380/3000 [13:24<11:39,  2.31it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.62e+6]Epoch 1380/3000:  46%|████▌     | 1380/3000 [13:24<11:39,  2.31it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.62e+6]Epoch 1381/3000:  46%|████▌     | 1380/3000 [13:24<11:39,  2.31it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.62e+6]Epoch 1381/3000:  46%|████▌     | 1381/3000 [13:24<11:40,  2.31it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.62e+6]Epoch 1381/3000:  46%|████▌     | 1381/3000 [13:24<11:40,  2.31it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.62e+6]Epoch 1382/3000:  46%|████▌     | 1381/3000 [13:24<11:40,  2.31it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.62e+6]Epoch 1382/3000:  46%|████▌     | 1382/3000 [13:25<11:24,  2.36it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.62e+6]Epoch 1382/3000:  46%|████▌     | 1382/3000 [13:25<11:24,  2.36it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.62e+6]Epoch 1383/3000:  46%|████▌     | 1382/3000 [13:25<11:24,  2.36it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.62e+6]Epoch 1383/3000:  46%|████▌     | 1383/3000 [13:25<11:26,  2.35it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.62e+6]Epoch 1383/3000:  46%|████▌     | 1383/3000 [13:25<11:26,  2.35it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.62e+6]Epoch 1384/3000:  46%|████▌     | 1383/3000 [13:25<11:26,  2.35it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.62e+6]Epoch 1384/3000:  46%|████▌     | 1384/3000 [13:26<11:45,  2.29it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.62e+6]Epoch 1384/3000:  46%|████▌     | 1384/3000 [13:26<11:45,  2.29it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.62e+6]Epoch 1385/3000:  46%|████▌     | 1384/3000 [13:26<11:45,  2.29it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.62e+6]Epoch 1385/3000:  46%|████▌     | 1385/3000 [13:26<11:42,  2.30it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.62e+6]Epoch 1385/3000:  46%|████▌     | 1385/3000 [13:26<11:42,  2.30it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.62e+6]Epoch 1386/3000:  46%|████▌     | 1385/3000 [13:26<11:42,  2.30it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.62e+6]Epoch 1386/3000:  46%|████▌     | 1386/3000 [13:27<11:47,  2.28it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.62e+6]Epoch 1386/3000:  46%|████▌     | 1386/3000 [13:27<11:47,  2.28it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.62e+6]Epoch 1387/3000:  46%|████▌     | 1386/3000 [13:27<11:47,  2.28it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.62e+6]Epoch 1387/3000:  46%|████▌     | 1387/3000 [13:27<11:42,  2.30it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.62e+6]Epoch 1387/3000:  46%|████▌     | 1387/3000 [13:27<11:42,  2.30it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.62e+6]Epoch 1388/3000:  46%|████▌     | 1387/3000 [13:27<11:42,  2.30it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.62e+6]Epoch 1388/3000:  46%|████▋     | 1388/3000 [13:27<11:10,  2.40it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.62e+6]Epoch 1388/3000:  46%|████▋     | 1388/3000 [13:27<11:10,  2.40it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.62e+6]Epoch 1389/3000:  46%|████▋     | 1388/3000 [13:27<11:10,  2.40it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.62e+6]Epoch 1389/3000:  46%|████▋     | 1389/3000 [13:28<10:48,  2.48it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.62e+6]Epoch 1389/3000:  46%|████▋     | 1389/3000 [13:28<10:48,  2.48it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.62e+6]Epoch 1390/3000:  46%|████▋     | 1389/3000 [13:28<10:48,  2.48it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.62e+6]Epoch 1390/3000:  46%|████▋     | 1390/3000 [13:28<11:22,  2.36it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.62e+6]Epoch 1390/3000:  46%|████▋     | 1390/3000 [13:28<11:22,  2.36it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.62e+6]Epoch 1391/3000:  46%|████▋     | 1390/3000 [13:28<11:22,  2.36it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.62e+6]Epoch 1391/3000:  46%|████▋     | 1391/3000 [13:29<11:44,  2.28it/s, v_num=1, train_loss_step=1.65e+6, train_loss_epoch=1.62e+6]Epoch 1391/3000:  46%|████▋     | 1391/3000 [13:29<11:44,  2.28it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.62e+6]Epoch 1392/3000:  46%|████▋     | 1391/3000 [13:29<11:44,  2.28it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.62e+6]Epoch 1392/3000:  46%|████▋     | 1392/3000 [13:29<11:20,  2.36it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.62e+6]Epoch 1392/3000:  46%|████▋     | 1392/3000 [13:29<11:20,  2.36it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.62e+6]Epoch 1393/3000:  46%|████▋     | 1392/3000 [13:29<11:20,  2.36it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.62e+6]Epoch 1393/3000:  46%|████▋     | 1393/3000 [13:29<11:39,  2.30it/s, v_num=1, train_loss_step=1.69e+6, train_loss_epoch=1.62e+6]Epoch 1393/3000:  46%|████▋     | 1393/3000 [13:29<11:39,  2.30it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.62e+6]Epoch 1394/3000:  46%|████▋     | 1393/3000 [13:29<11:39,  2.30it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.62e+6]Epoch 1394/3000:  46%|████▋     | 1394/3000 [13:30<11:55,  2.24it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.62e+6]Epoch 1394/3000:  46%|████▋     | 1394/3000 [13:30<11:55,  2.24it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.62e+6]Epoch 1395/3000:  46%|████▋     | 1394/3000 [13:30<11:55,  2.24it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.62e+6]Epoch 1395/3000:  46%|████▋     | 1395/3000 [13:30<12:03,  2.22it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.62e+6]Epoch 1395/3000:  46%|████▋     | 1395/3000 [13:30<12:03,  2.22it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.62e+6]Epoch 1396/3000:  46%|████▋     | 1395/3000 [13:30<12:03,  2.22it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.62e+6]Epoch 1396/3000:  47%|████▋     | 1396/3000 [13:31<11:38,  2.30it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.62e+6]Epoch 1396/3000:  47%|████▋     | 1396/3000 [13:31<11:38,  2.30it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.62e+6]Epoch 1397/3000:  47%|████▋     | 1396/3000 [13:31<11:38,  2.30it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.62e+6]Epoch 1397/3000:  47%|████▋     | 1397/3000 [13:31<11:37,  2.30it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.62e+6]Epoch 1397/3000:  47%|████▋     | 1397/3000 [13:31<11:37,  2.30it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.62e+6]Epoch 1398/3000:  47%|████▋     | 1397/3000 [13:31<11:37,  2.30it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.62e+6]Epoch 1398/3000:  47%|████▋     | 1398/3000 [13:32<11:43,  2.28it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.62e+6]Epoch 1398/3000:  47%|████▋     | 1398/3000 [13:32<11:43,  2.28it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.62e+6]Epoch 1399/3000:  47%|████▋     | 1398/3000 [13:32<11:43,  2.28it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.62e+6]Epoch 1399/3000:  47%|████▋     | 1399/3000 [13:32<11:39,  2.29it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.62e+6]Epoch 1399/3000:  47%|████▋     | 1399/3000 [13:32<11:39,  2.29it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.62e+6]Epoch 1400/3000:  47%|████▋     | 1399/3000 [13:32<11:39,  2.29it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.62e+6]Epoch 1400/3000:  47%|████▋     | 1400/3000 [13:33<11:50,  2.25it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.62e+6]Epoch 1400/3000:  47%|████▋     | 1400/3000 [13:33<11:50,  2.25it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.62e+6]Epoch 1401/3000:  47%|████▋     | 1400/3000 [13:33<11:50,  2.25it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.62e+6]Epoch 1401/3000:  47%|████▋     | 1401/3000 [13:33<11:51,  2.25it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.62e+6]Epoch 1401/3000:  47%|████▋     | 1401/3000 [13:33<11:51,  2.25it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.62e+6] Epoch 1402/3000:  47%|████▋     | 1401/3000 [13:33<11:51,  2.25it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.62e+6]Epoch 1402/3000:  47%|████▋     | 1402/3000 [13:33<11:52,  2.24it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.62e+6]Epoch 1402/3000:  47%|████▋     | 1402/3000 [13:33<11:52,  2.24it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.62e+6]Epoch 1403/3000:  47%|████▋     | 1402/3000 [13:33<11:52,  2.24it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.62e+6]Epoch 1403/3000:  47%|████▋     | 1403/3000 [13:34<11:09,  2.38it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.62e+6]Epoch 1403/3000:  47%|████▋     | 1403/3000 [13:34<11:09,  2.38it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.61e+6]Epoch 1404/3000:  47%|████▋     | 1403/3000 [13:34<11:09,  2.38it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.61e+6]Epoch 1404/3000:  47%|████▋     | 1404/3000 [13:34<11:28,  2.32it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.61e+6]Epoch 1404/3000:  47%|████▋     | 1404/3000 [13:34<11:28,  2.32it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.61e+6]Epoch 1405/3000:  47%|████▋     | 1404/3000 [13:34<11:28,  2.32it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.61e+6]Epoch 1405/3000:  47%|████▋     | 1405/3000 [13:35<11:27,  2.32it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.61e+6]Epoch 1405/3000:  47%|████▋     | 1405/3000 [13:35<11:27,  2.32it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.61e+6]Epoch 1406/3000:  47%|████▋     | 1405/3000 [13:35<11:27,  2.32it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.61e+6]Epoch 1406/3000:  47%|████▋     | 1406/3000 [13:35<11:47,  2.25it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.61e+6]Epoch 1406/3000:  47%|████▋     | 1406/3000 [13:35<11:47,  2.25it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.61e+6]Epoch 1407/3000:  47%|████▋     | 1406/3000 [13:35<11:47,  2.25it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.61e+6]Epoch 1407/3000:  47%|████▋     | 1407/3000 [13:36<11:55,  2.23it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.61e+6]Epoch 1407/3000:  47%|████▋     | 1407/3000 [13:36<11:55,  2.23it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.61e+6]Epoch 1408/3000:  47%|████▋     | 1407/3000 [13:36<11:55,  2.23it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.61e+6]Epoch 1408/3000:  47%|████▋     | 1408/3000 [13:36<11:47,  2.25it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.61e+6]Epoch 1408/3000:  47%|████▋     | 1408/3000 [13:36<11:47,  2.25it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.61e+6]Epoch 1409/3000:  47%|████▋     | 1408/3000 [13:36<11:47,  2.25it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.61e+6]Epoch 1409/3000:  47%|████▋     | 1409/3000 [13:37<11:57,  2.22it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.61e+6]Epoch 1409/3000:  47%|████▋     | 1409/3000 [13:37<11:57,  2.22it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6] Epoch 1410/3000:  47%|████▋     | 1409/3000 [13:37<11:57,  2.22it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1410/3000:  47%|████▋     | 1410/3000 [13:37<11:35,  2.29it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1410/3000:  47%|████▋     | 1410/3000 [13:37<11:35,  2.29it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.61e+6]Epoch 1411/3000:  47%|████▋     | 1410/3000 [13:37<11:35,  2.29it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.61e+6]Epoch 1411/3000:  47%|████▋     | 1411/3000 [13:37<11:13,  2.36it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.61e+6]Epoch 1411/3000:  47%|████▋     | 1411/3000 [13:37<11:13,  2.36it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.61e+6]Epoch 1412/3000:  47%|████▋     | 1411/3000 [13:37<11:13,  2.36it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.61e+6]Epoch 1412/3000:  47%|████▋     | 1412/3000 [13:38<11:05,  2.39it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.61e+6]Epoch 1412/3000:  47%|████▋     | 1412/3000 [13:38<11:05,  2.39it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.61e+6]Epoch 1413/3000:  47%|████▋     | 1412/3000 [13:38<11:05,  2.39it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.61e+6]Epoch 1413/3000:  47%|████▋     | 1413/3000 [13:38<10:42,  2.47it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.61e+6]Epoch 1413/3000:  47%|████▋     | 1413/3000 [13:38<10:42,  2.47it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.61e+6]Epoch 1414/3000:  47%|████▋     | 1413/3000 [13:38<10:42,  2.47it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.61e+6]Epoch 1414/3000:  47%|████▋     | 1414/3000 [13:39<10:40,  2.48it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.61e+6]Epoch 1414/3000:  47%|████▋     | 1414/3000 [13:39<10:40,  2.48it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.61e+6]Epoch 1415/3000:  47%|████▋     | 1414/3000 [13:39<10:40,  2.48it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.61e+6]Epoch 1415/3000:  47%|████▋     | 1415/3000 [13:39<10:58,  2.41it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.61e+6]Epoch 1415/3000:  47%|████▋     | 1415/3000 [13:39<10:58,  2.41it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.61e+6]Epoch 1416/3000:  47%|████▋     | 1415/3000 [13:39<10:58,  2.41it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.61e+6]Epoch 1416/3000:  47%|████▋     | 1416/3000 [13:39<10:15,  2.58it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.61e+6]Epoch 1416/3000:  47%|████▋     | 1416/3000 [13:39<10:15,  2.58it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6] Epoch 1417/3000:  47%|████▋     | 1416/3000 [13:39<10:15,  2.58it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1417/3000:  47%|████▋     | 1417/3000 [13:40<10:52,  2.43it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1417/3000:  47%|████▋     | 1417/3000 [13:40<10:52,  2.43it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1418/3000:  47%|████▋     | 1417/3000 [13:40<10:52,  2.43it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1418/3000:  47%|████▋     | 1418/3000 [13:40<11:11,  2.36it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1418/3000:  47%|████▋     | 1418/3000 [13:40<11:11,  2.36it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1419/3000:  47%|████▋     | 1418/3000 [13:40<11:11,  2.36it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1419/3000:  47%|████▋     | 1419/3000 [13:41<11:24,  2.31it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1419/3000:  47%|████▋     | 1419/3000 [13:41<11:24,  2.31it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1420/3000:  47%|████▋     | 1419/3000 [13:41<11:24,  2.31it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1420/3000:  47%|████▋     | 1420/3000 [13:41<11:31,  2.28it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1420/3000:  47%|████▋     | 1420/3000 [13:41<11:31,  2.28it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1421/3000:  47%|████▋     | 1420/3000 [13:41<11:31,  2.28it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1421/3000:  47%|████▋     | 1421/3000 [13:42<11:43,  2.24it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1421/3000:  47%|████▋     | 1421/3000 [13:42<11:43,  2.24it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.61e+6]Epoch 1422/3000:  47%|████▋     | 1421/3000 [13:42<11:43,  2.24it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.61e+6]Epoch 1422/3000:  47%|████▋     | 1422/3000 [13:42<11:43,  2.24it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.61e+6]Epoch 1422/3000:  47%|████▋     | 1422/3000 [13:42<11:43,  2.24it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.61e+6]Epoch 1423/3000:  47%|████▋     | 1422/3000 [13:42<11:43,  2.24it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.61e+6]Epoch 1423/3000:  47%|████▋     | 1423/3000 [13:42<11:39,  2.25it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.61e+6]Epoch 1423/3000:  47%|████▋     | 1423/3000 [13:42<11:39,  2.25it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6] Epoch 1424/3000:  47%|████▋     | 1423/3000 [13:42<11:39,  2.25it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1424/3000:  47%|████▋     | 1424/3000 [13:43<12:03,  2.18it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1424/3000:  47%|████▋     | 1424/3000 [13:43<12:03,  2.18it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1425/3000:  47%|████▋     | 1424/3000 [13:43<12:03,  2.18it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1425/3000:  48%|████▊     | 1425/3000 [13:43<11:52,  2.21it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1425/3000:  48%|████▊     | 1425/3000 [13:43<11:52,  2.21it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.61e+6]Epoch 1426/3000:  48%|████▊     | 1425/3000 [13:43<11:52,  2.21it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.61e+6]Epoch 1426/3000:  48%|████▊     | 1426/3000 [13:44<11:07,  2.36it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.61e+6]Epoch 1426/3000:  48%|████▊     | 1426/3000 [13:44<11:07,  2.36it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.61e+6]Epoch 1427/3000:  48%|████▊     | 1426/3000 [13:44<11:07,  2.36it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.61e+6]Epoch 1427/3000:  48%|████▊     | 1427/3000 [13:44<11:15,  2.33it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.61e+6]Epoch 1427/3000:  48%|████▊     | 1427/3000 [13:44<11:15,  2.33it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6] Epoch 1428/3000:  48%|████▊     | 1427/3000 [13:44<11:15,  2.33it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1428/3000:  48%|████▊     | 1428/3000 [13:45<11:16,  2.32it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.61e+6]Epoch 1428/3000:  48%|████▊     | 1428/3000 [13:45<11:16,  2.32it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.61e+6]Epoch 1429/3000:  48%|████▊     | 1428/3000 [13:45<11:16,  2.32it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.61e+6]Epoch 1429/3000:  48%|████▊     | 1429/3000 [13:45<11:48,  2.22it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.61e+6]Epoch 1429/3000:  48%|████▊     | 1429/3000 [13:45<11:48,  2.22it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.61e+6]Epoch 1430/3000:  48%|████▊     | 1429/3000 [13:45<11:48,  2.22it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.61e+6]Epoch 1430/3000:  48%|████▊     | 1430/3000 [13:46<11:22,  2.30it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.61e+6]Epoch 1430/3000:  48%|████▊     | 1430/3000 [13:46<11:22,  2.30it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.61e+6]Epoch 1431/3000:  48%|████▊     | 1430/3000 [13:46<11:22,  2.30it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.61e+6]Epoch 1431/3000:  48%|████▊     | 1431/3000 [13:46<11:22,  2.30it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.61e+6]Epoch 1431/3000:  48%|████▊     | 1431/3000 [13:46<11:22,  2.30it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.61e+6]Epoch 1432/3000:  48%|████▊     | 1431/3000 [13:46<11:22,  2.30it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.61e+6]Epoch 1432/3000:  48%|████▊     | 1432/3000 [13:46<11:14,  2.33it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.61e+6]Epoch 1432/3000:  48%|████▊     | 1432/3000 [13:46<11:14,  2.33it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.6e+6] Epoch 1433/3000:  48%|████▊     | 1432/3000 [13:46<11:14,  2.33it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.6e+6]Epoch 1433/3000:  48%|████▊     | 1433/3000 [13:47<11:09,  2.34it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.6e+6]Epoch 1433/3000:  48%|████▊     | 1433/3000 [13:47<11:09,  2.34it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.6e+6]Epoch 1434/3000:  48%|████▊     | 1433/3000 [13:47<11:09,  2.34it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.6e+6]Epoch 1434/3000:  48%|████▊     | 1434/3000 [13:47<11:29,  2.27it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.6e+6]Epoch 1434/3000:  48%|████▊     | 1434/3000 [13:47<11:29,  2.27it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.6e+6]Epoch 1435/3000:  48%|████▊     | 1434/3000 [13:47<11:29,  2.27it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.6e+6]Epoch 1435/3000:  48%|████▊     | 1435/3000 [13:48<11:47,  2.21it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.6e+6]Epoch 1435/3000:  48%|████▊     | 1435/3000 [13:48<11:47,  2.21it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.6e+6]Epoch 1436/3000:  48%|████▊     | 1435/3000 [13:48<11:47,  2.21it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.6e+6]Epoch 1436/3000:  48%|████▊     | 1436/3000 [13:48<11:26,  2.28it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.6e+6]Epoch 1436/3000:  48%|████▊     | 1436/3000 [13:48<11:26,  2.28it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.6e+6]Epoch 1437/3000:  48%|████▊     | 1436/3000 [13:48<11:26,  2.28it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.6e+6]Epoch 1437/3000:  48%|████▊     | 1437/3000 [13:49<11:07,  2.34it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.6e+6]Epoch 1437/3000:  48%|████▊     | 1437/3000 [13:49<11:07,  2.34it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.6e+6]Epoch 1438/3000:  48%|████▊     | 1437/3000 [13:49<11:07,  2.34it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.6e+6]Epoch 1438/3000:  48%|████▊     | 1438/3000 [13:49<10:58,  2.37it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.6e+6]Epoch 1438/3000:  48%|████▊     | 1438/3000 [13:49<10:58,  2.37it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.6e+6]Epoch 1439/3000:  48%|████▊     | 1438/3000 [13:49<10:58,  2.37it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.6e+6]Epoch 1439/3000:  48%|████▊     | 1439/3000 [13:49<11:11,  2.33it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.6e+6]Epoch 1439/3000:  48%|████▊     | 1439/3000 [13:49<11:11,  2.33it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.6e+6]Epoch 1440/3000:  48%|████▊     | 1439/3000 [13:49<11:11,  2.33it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.6e+6]Epoch 1440/3000:  48%|████▊     | 1440/3000 [13:50<10:23,  2.50it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.6e+6]Epoch 1440/3000:  48%|████▊     | 1440/3000 [13:50<10:23,  2.50it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.6e+6]Epoch 1441/3000:  48%|████▊     | 1440/3000 [13:50<10:23,  2.50it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.6e+6]Epoch 1441/3000:  48%|████▊     | 1441/3000 [13:50<10:42,  2.42it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.6e+6]Epoch 1441/3000:  48%|████▊     | 1441/3000 [13:50<10:42,  2.42it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.6e+6]Epoch 1442/3000:  48%|████▊     | 1441/3000 [13:50<10:42,  2.42it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.6e+6]Epoch 1442/3000:  48%|████▊     | 1442/3000 [13:51<10:47,  2.41it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.6e+6]Epoch 1442/3000:  48%|████▊     | 1442/3000 [13:51<10:47,  2.41it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.6e+6] Epoch 1443/3000:  48%|████▊     | 1442/3000 [13:51<10:47,  2.41it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.6e+6]Epoch 1443/3000:  48%|████▊     | 1443/3000 [13:51<11:24,  2.28it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.6e+6]Epoch 1443/3000:  48%|████▊     | 1443/3000 [13:51<11:24,  2.28it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.6e+6]Epoch 1444/3000:  48%|████▊     | 1443/3000 [13:51<11:24,  2.28it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.6e+6]Epoch 1444/3000:  48%|████▊     | 1444/3000 [13:51<10:39,  2.43it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.6e+6]Epoch 1444/3000:  48%|████▊     | 1444/3000 [13:51<10:39,  2.43it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.6e+6]Epoch 1445/3000:  48%|████▊     | 1444/3000 [13:51<10:39,  2.43it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.6e+6]Epoch 1445/3000:  48%|████▊     | 1445/3000 [13:52<09:55,  2.61it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.6e+6]Epoch 1445/3000:  48%|████▊     | 1445/3000 [13:52<09:55,  2.61it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.6e+6]Epoch 1446/3000:  48%|████▊     | 1445/3000 [13:52<09:55,  2.61it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.6e+6]Epoch 1446/3000:  48%|████▊     | 1446/3000 [13:52<10:32,  2.46it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.6e+6]Epoch 1446/3000:  48%|████▊     | 1446/3000 [13:52<10:32,  2.46it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.6e+6]Epoch 1447/3000:  48%|████▊     | 1446/3000 [13:52<10:32,  2.46it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.6e+6]Epoch 1447/3000:  48%|████▊     | 1447/3000 [13:53<10:58,  2.36it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.6e+6]Epoch 1447/3000:  48%|████▊     | 1447/3000 [13:53<10:58,  2.36it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.6e+6]Epoch 1448/3000:  48%|████▊     | 1447/3000 [13:53<10:58,  2.36it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.6e+6]Epoch 1448/3000:  48%|████▊     | 1448/3000 [13:53<11:06,  2.33it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.6e+6]Epoch 1448/3000:  48%|████▊     | 1448/3000 [13:53<11:06,  2.33it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.6e+6]Epoch 1449/3000:  48%|████▊     | 1448/3000 [13:53<11:06,  2.33it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.6e+6]Epoch 1449/3000:  48%|████▊     | 1449/3000 [13:54<11:04,  2.33it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.6e+6]Epoch 1449/3000:  48%|████▊     | 1449/3000 [13:54<11:04,  2.33it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.6e+6]Epoch 1450/3000:  48%|████▊     | 1449/3000 [13:54<11:04,  2.33it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.6e+6]Epoch 1450/3000:  48%|████▊     | 1450/3000 [13:54<11:12,  2.31it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.6e+6]Epoch 1450/3000:  48%|████▊     | 1450/3000 [13:54<11:12,  2.31it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.6e+6]Epoch 1451/3000:  48%|████▊     | 1450/3000 [13:54<11:12,  2.31it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.6e+6]Epoch 1451/3000:  48%|████▊     | 1451/3000 [13:54<11:03,  2.34it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.6e+6]Epoch 1451/3000:  48%|████▊     | 1451/3000 [13:54<11:03,  2.34it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.6e+6]Epoch 1452/3000:  48%|████▊     | 1451/3000 [13:54<11:03,  2.34it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.6e+6]Epoch 1452/3000:  48%|████▊     | 1452/3000 [13:55<11:01,  2.34it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.6e+6]Epoch 1452/3000:  48%|████▊     | 1452/3000 [13:55<11:01,  2.34it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.6e+6] Epoch 1453/3000:  48%|████▊     | 1452/3000 [13:55<11:01,  2.34it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.6e+6]Epoch 1453/3000:  48%|████▊     | 1453/3000 [13:55<11:26,  2.25it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.6e+6]Epoch 1453/3000:  48%|████▊     | 1453/3000 [13:55<11:26,  2.25it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.6e+6]Epoch 1454/3000:  48%|████▊     | 1453/3000 [13:55<11:26,  2.25it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.6e+6]Epoch 1454/3000:  48%|████▊     | 1454/3000 [13:56<11:50,  2.18it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.6e+6]Epoch 1454/3000:  48%|████▊     | 1454/3000 [13:56<11:50,  2.18it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.6e+6]Epoch 1455/3000:  48%|████▊     | 1454/3000 [13:56<11:50,  2.18it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.6e+6]Epoch 1455/3000:  48%|████▊     | 1455/3000 [13:56<11:58,  2.15it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.6e+6]Epoch 1455/3000:  48%|████▊     | 1455/3000 [13:56<11:58,  2.15it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.6e+6]Epoch 1456/3000:  48%|████▊     | 1455/3000 [13:56<11:58,  2.15it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.6e+6]Epoch 1456/3000:  49%|████▊     | 1456/3000 [13:57<12:01,  2.14it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.6e+6]Epoch 1456/3000:  49%|████▊     | 1456/3000 [13:57<12:01,  2.14it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.6e+6]Epoch 1457/3000:  49%|████▊     | 1456/3000 [13:57<12:01,  2.14it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.6e+6]Epoch 1457/3000:  49%|████▊     | 1457/3000 [13:57<11:28,  2.24it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.6e+6]Epoch 1457/3000:  49%|████▊     | 1457/3000 [13:57<11:28,  2.24it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.6e+6] Epoch 1458/3000:  49%|████▊     | 1457/3000 [13:57<11:28,  2.24it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.6e+6]Epoch 1458/3000:  49%|████▊     | 1458/3000 [13:58<11:41,  2.20it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.6e+6]Epoch 1458/3000:  49%|████▊     | 1458/3000 [13:58<11:41,  2.20it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.6e+6]Epoch 1459/3000:  49%|████▊     | 1458/3000 [13:58<11:41,  2.20it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.6e+6]Epoch 1459/3000:  49%|████▊     | 1459/3000 [13:58<11:40,  2.20it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.6e+6]Epoch 1459/3000:  49%|████▊     | 1459/3000 [13:58<11:40,  2.20it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.6e+6]Epoch 1460/3000:  49%|████▊     | 1459/3000 [13:58<11:40,  2.20it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.6e+6]Epoch 1460/3000:  49%|████▊     | 1460/3000 [13:59<11:17,  2.27it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.6e+6]Epoch 1460/3000:  49%|████▊     | 1460/3000 [13:59<11:17,  2.27it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.6e+6]Epoch 1461/3000:  49%|████▊     | 1460/3000 [13:59<11:17,  2.27it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.6e+6]Epoch 1461/3000:  49%|████▊     | 1461/3000 [13:59<11:20,  2.26it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.6e+6]Epoch 1461/3000:  49%|████▊     | 1461/3000 [13:59<11:20,  2.26it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.6e+6]Epoch 1462/3000:  49%|████▊     | 1461/3000 [13:59<11:20,  2.26it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.6e+6]Epoch 1462/3000:  49%|████▊     | 1462/3000 [13:59<11:02,  2.32it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.6e+6]Epoch 1462/3000:  49%|████▊     | 1462/3000 [13:59<11:02,  2.32it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.6e+6]Epoch 1463/3000:  49%|████▊     | 1462/3000 [13:59<11:02,  2.32it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.6e+6]Epoch 1463/3000:  49%|████▉     | 1463/3000 [14:00<10:58,  2.34it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.6e+6]Epoch 1463/3000:  49%|████▉     | 1463/3000 [14:00<10:58,  2.34it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.59e+6]Epoch 1464/3000:  49%|████▉     | 1463/3000 [14:00<10:58,  2.34it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.59e+6]Epoch 1464/3000:  49%|████▉     | 1464/3000 [14:00<10:47,  2.37it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.59e+6]Epoch 1464/3000:  49%|████▉     | 1464/3000 [14:00<10:47,  2.37it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.59e+6] Epoch 1465/3000:  49%|████▉     | 1464/3000 [14:00<10:47,  2.37it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.59e+6]Epoch 1465/3000:  49%|████▉     | 1465/3000 [14:01<11:06,  2.30it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.59e+6]Epoch 1465/3000:  49%|████▉     | 1465/3000 [14:01<11:06,  2.30it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.59e+6]Epoch 1466/3000:  49%|████▉     | 1465/3000 [14:01<11:06,  2.30it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.59e+6]Epoch 1466/3000:  49%|████▉     | 1466/3000 [14:01<11:26,  2.23it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.59e+6]Epoch 1466/3000:  49%|████▉     | 1466/3000 [14:01<11:26,  2.23it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.59e+6]Epoch 1467/3000:  49%|████▉     | 1466/3000 [14:01<11:26,  2.23it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.59e+6]Epoch 1467/3000:  49%|████▉     | 1467/3000 [14:02<11:21,  2.25it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.59e+6]Epoch 1467/3000:  49%|████▉     | 1467/3000 [14:02<11:21,  2.25it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.59e+6]Epoch 1468/3000:  49%|████▉     | 1467/3000 [14:02<11:21,  2.25it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.59e+6]Epoch 1468/3000:  49%|████▉     | 1468/3000 [14:02<10:56,  2.34it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.59e+6]Epoch 1468/3000:  49%|████▉     | 1468/3000 [14:02<10:56,  2.34it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.59e+6] Epoch 1469/3000:  49%|████▉     | 1468/3000 [14:02<10:56,  2.34it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.59e+6]Epoch 1469/3000:  49%|████▉     | 1469/3000 [14:02<11:08,  2.29it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.59e+6]Epoch 1469/3000:  49%|████▉     | 1469/3000 [14:02<11:08,  2.29it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.59e+6]Epoch 1470/3000:  49%|████▉     | 1469/3000 [14:02<11:08,  2.29it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.59e+6]Epoch 1470/3000:  49%|████▉     | 1470/3000 [14:03<10:55,  2.34it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.59e+6]Epoch 1470/3000:  49%|████▉     | 1470/3000 [14:03<10:55,  2.34it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.59e+6]Epoch 1471/3000:  49%|████▉     | 1470/3000 [14:03<10:55,  2.34it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.59e+6]Epoch 1471/3000:  49%|████▉     | 1471/3000 [14:03<10:18,  2.47it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.59e+6]Epoch 1471/3000:  49%|████▉     | 1471/3000 [14:03<10:18,  2.47it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.59e+6]Epoch 1472/3000:  49%|████▉     | 1471/3000 [14:03<10:18,  2.47it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.59e+6]Epoch 1472/3000:  49%|████▉     | 1472/3000 [14:04<10:04,  2.53it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.59e+6]Epoch 1472/3000:  49%|████▉     | 1472/3000 [14:04<10:04,  2.53it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.59e+6]Epoch 1473/3000:  49%|████▉     | 1472/3000 [14:04<10:04,  2.53it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.59e+6]Epoch 1473/3000:  49%|████▉     | 1473/3000 [14:04<10:42,  2.38it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.59e+6]Epoch 1473/3000:  49%|████▉     | 1473/3000 [14:04<10:42,  2.38it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.59e+6]Epoch 1474/3000:  49%|████▉     | 1473/3000 [14:04<10:42,  2.38it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.59e+6]Epoch 1474/3000:  49%|████▉     | 1474/3000 [14:05<11:10,  2.27it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.59e+6]Epoch 1474/3000:  49%|████▉     | 1474/3000 [14:05<11:10,  2.27it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.59e+6]Epoch 1475/3000:  49%|████▉     | 1474/3000 [14:05<11:10,  2.27it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.59e+6]Epoch 1475/3000:  49%|████▉     | 1475/3000 [14:05<11:22,  2.23it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.59e+6]Epoch 1475/3000:  49%|████▉     | 1475/3000 [14:05<11:22,  2.23it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.59e+6]Epoch 1476/3000:  49%|████▉     | 1475/3000 [14:05<11:22,  2.23it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.59e+6]Epoch 1476/3000:  49%|████▉     | 1476/3000 [14:05<11:11,  2.27it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.59e+6]Epoch 1476/3000:  49%|████▉     | 1476/3000 [14:05<11:11,  2.27it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.59e+6]Epoch 1477/3000:  49%|████▉     | 1476/3000 [14:05<11:11,  2.27it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.59e+6]Epoch 1477/3000:  49%|████▉     | 1477/3000 [14:06<11:13,  2.26it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.59e+6]Epoch 1477/3000:  49%|████▉     | 1477/3000 [14:06<11:13,  2.26it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.59e+6]Epoch 1478/3000:  49%|████▉     | 1477/3000 [14:06<11:13,  2.26it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.59e+6]Epoch 1478/3000:  49%|████▉     | 1478/3000 [14:06<11:17,  2.25it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.59e+6]Epoch 1478/3000:  49%|████▉     | 1478/3000 [14:06<11:17,  2.25it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.59e+6]Epoch 1479/3000:  49%|████▉     | 1478/3000 [14:06<11:17,  2.25it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.59e+6]Epoch 1479/3000:  49%|████▉     | 1479/3000 [14:07<11:46,  2.15it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.59e+6]Epoch 1479/3000:  49%|████▉     | 1479/3000 [14:07<11:46,  2.15it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.59e+6]Epoch 1480/3000:  49%|████▉     | 1479/3000 [14:07<11:46,  2.15it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.59e+6]Epoch 1480/3000:  49%|████▉     | 1480/3000 [14:07<11:21,  2.23it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.59e+6]Epoch 1480/3000:  49%|████▉     | 1480/3000 [14:07<11:21,  2.23it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.59e+6]Epoch 1481/3000:  49%|████▉     | 1480/3000 [14:07<11:21,  2.23it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.59e+6]Epoch 1481/3000:  49%|████▉     | 1481/3000 [14:08<11:39,  2.17it/s, v_num=1, train_loss_step=1.66e+6, train_loss_epoch=1.59e+6]Epoch 1481/3000:  49%|████▉     | 1481/3000 [14:08<11:39,  2.17it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.59e+6]Epoch 1482/3000:  49%|████▉     | 1481/3000 [14:08<11:39,  2.17it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.59e+6]Epoch 1482/3000:  49%|████▉     | 1482/3000 [14:08<11:16,  2.25it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.59e+6]Epoch 1482/3000:  49%|████▉     | 1482/3000 [14:08<11:16,  2.25it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.59e+6] Epoch 1483/3000:  49%|████▉     | 1482/3000 [14:08<11:16,  2.25it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.59e+6]Epoch 1483/3000:  49%|████▉     | 1483/3000 [14:09<10:54,  2.32it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.59e+6]Epoch 1483/3000:  49%|████▉     | 1483/3000 [14:09<10:54,  2.32it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.59e+6]Epoch 1484/3000:  49%|████▉     | 1483/3000 [14:09<10:54,  2.32it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.59e+6]Epoch 1484/3000:  49%|████▉     | 1484/3000 [14:09<11:01,  2.29it/s, v_num=1, train_loss_step=1.67e+6, train_loss_epoch=1.59e+6]Epoch 1484/3000:  49%|████▉     | 1484/3000 [14:09<11:01,  2.29it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.59e+6]Epoch 1485/3000:  49%|████▉     | 1484/3000 [14:09<11:01,  2.29it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.59e+6]Epoch 1485/3000:  50%|████▉     | 1485/3000 [14:09<11:12,  2.25it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.59e+6]Epoch 1485/3000:  50%|████▉     | 1485/3000 [14:09<11:12,  2.25it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.59e+6] Epoch 1486/3000:  50%|████▉     | 1485/3000 [14:09<11:12,  2.25it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.59e+6]Epoch 1486/3000:  50%|████▉     | 1486/3000 [14:10<10:26,  2.42it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.59e+6]Epoch 1486/3000:  50%|████▉     | 1486/3000 [14:10<10:26,  2.42it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.59e+6]Epoch 1487/3000:  50%|████▉     | 1486/3000 [14:10<10:26,  2.42it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.59e+6]Epoch 1487/3000:  50%|████▉     | 1487/3000 [14:10<10:35,  2.38it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.59e+6]Epoch 1487/3000:  50%|████▉     | 1487/3000 [14:10<10:35,  2.38it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.59e+6]Epoch 1488/3000:  50%|████▉     | 1487/3000 [14:10<10:35,  2.38it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.59e+6]Epoch 1488/3000:  50%|████▉     | 1488/3000 [14:11<10:49,  2.33it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.59e+6]Epoch 1488/3000:  50%|████▉     | 1488/3000 [14:11<10:49,  2.33it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.59e+6] Epoch 1489/3000:  50%|████▉     | 1488/3000 [14:11<10:49,  2.33it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.59e+6]Epoch 1489/3000:  50%|████▉     | 1489/3000 [14:11<10:49,  2.33it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.59e+6]Epoch 1489/3000:  50%|████▉     | 1489/3000 [14:11<10:49,  2.33it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.59e+6]Epoch 1490/3000:  50%|████▉     | 1489/3000 [14:11<10:49,  2.33it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.59e+6]Epoch 1490/3000:  50%|████▉     | 1490/3000 [14:12<11:00,  2.29it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.59e+6]Epoch 1490/3000:  50%|████▉     | 1490/3000 [14:12<11:00,  2.29it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.59e+6]Epoch 1491/3000:  50%|████▉     | 1490/3000 [14:12<11:00,  2.29it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.59e+6]Epoch 1491/3000:  50%|████▉     | 1491/3000 [14:12<11:29,  2.19it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.59e+6]Epoch 1491/3000:  50%|████▉     | 1491/3000 [14:12<11:29,  2.19it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.59e+6]Epoch 1492/3000:  50%|████▉     | 1491/3000 [14:12<11:29,  2.19it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.59e+6]Epoch 1492/3000:  50%|████▉     | 1492/3000 [14:12<11:09,  2.25it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.59e+6]Epoch 1492/3000:  50%|████▉     | 1492/3000 [14:12<11:09,  2.25it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.59e+6]Epoch 1493/3000:  50%|████▉     | 1492/3000 [14:12<11:09,  2.25it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.59e+6]Epoch 1493/3000:  50%|████▉     | 1493/3000 [14:13<10:48,  2.32it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.59e+6]Epoch 1493/3000:  50%|████▉     | 1493/3000 [14:13<10:48,  2.32it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.59e+6]Epoch 1494/3000:  50%|████▉     | 1493/3000 [14:13<10:48,  2.32it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.59e+6]Epoch 1494/3000:  50%|████▉     | 1494/3000 [14:13<10:48,  2.32it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.59e+6]Epoch 1494/3000:  50%|████▉     | 1494/3000 [14:13<10:48,  2.32it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.59e+6]Epoch 1495/3000:  50%|████▉     | 1494/3000 [14:13<10:48,  2.32it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.59e+6]Epoch 1495/3000:  50%|████▉     | 1495/3000 [14:14<10:48,  2.32it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.59e+6]Epoch 1495/3000:  50%|████▉     | 1495/3000 [14:14<10:48,  2.32it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.58e+6] Epoch 1496/3000:  50%|████▉     | 1495/3000 [14:14<10:48,  2.32it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.58e+6]Epoch 1496/3000:  50%|████▉     | 1496/3000 [14:14<10:42,  2.34it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.58e+6]Epoch 1496/3000:  50%|████▉     | 1496/3000 [14:14<10:42,  2.34it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.58e+6]Epoch 1497/3000:  50%|████▉     | 1496/3000 [14:14<10:42,  2.34it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.58e+6]Epoch 1497/3000:  50%|████▉     | 1497/3000 [14:15<10:59,  2.28it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.58e+6]Epoch 1497/3000:  50%|████▉     | 1497/3000 [14:15<10:59,  2.28it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.58e+6]Epoch 1498/3000:  50%|████▉     | 1497/3000 [14:15<10:59,  2.28it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.58e+6]Epoch 1498/3000:  50%|████▉     | 1498/3000 [14:15<10:55,  2.29it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.58e+6]Epoch 1498/3000:  50%|████▉     | 1498/3000 [14:15<10:55,  2.29it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.58e+6]Epoch 1499/3000:  50%|████▉     | 1498/3000 [14:15<10:55,  2.29it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.58e+6]Epoch 1499/3000:  50%|████▉     | 1499/3000 [14:16<11:08,  2.25it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.58e+6]Epoch 1499/3000:  50%|████▉     | 1499/3000 [14:16<11:08,  2.25it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.58e+6]Epoch 1500/3000:  50%|████▉     | 1499/3000 [14:16<11:08,  2.25it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.58e+6]Epoch 1500/3000:  50%|█████     | 1500/3000 [14:16<10:48,  2.31it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.58e+6]Epoch 1500/3000:  50%|█████     | 1500/3000 [14:16<10:48,  2.31it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.58e+6]Epoch 1501/3000:  50%|█████     | 1500/3000 [14:16<10:48,  2.31it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.58e+6]Epoch 1501/3000:  50%|█████     | 1501/3000 [14:16<10:52,  2.30it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.58e+6]Epoch 1501/3000:  50%|█████     | 1501/3000 [14:16<10:52,  2.30it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.58e+6] Epoch 1502/3000:  50%|█████     | 1501/3000 [14:16<10:52,  2.30it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.58e+6]Epoch 1502/3000:  50%|█████     | 1502/3000 [14:17<10:59,  2.27it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.58e+6]Epoch 1502/3000:  50%|█████     | 1502/3000 [14:17<10:59,  2.27it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.58e+6]Epoch 1503/3000:  50%|█████     | 1502/3000 [14:17<10:59,  2.27it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.58e+6]Epoch 1503/3000:  50%|█████     | 1503/3000 [14:17<10:43,  2.33it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.58e+6]Epoch 1503/3000:  50%|█████     | 1503/3000 [14:17<10:43,  2.33it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.58e+6]Epoch 1504/3000:  50%|█████     | 1503/3000 [14:17<10:43,  2.33it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.58e+6]Epoch 1504/3000:  50%|█████     | 1504/3000 [14:18<11:02,  2.26it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.58e+6]Epoch 1504/3000:  50%|█████     | 1504/3000 [14:18<11:02,  2.26it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.58e+6]Epoch 1505/3000:  50%|█████     | 1504/3000 [14:18<11:02,  2.26it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.58e+6]Epoch 1505/3000:  50%|█████     | 1505/3000 [14:18<10:40,  2.33it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.58e+6]Epoch 1505/3000:  50%|█████     | 1505/3000 [14:18<10:40,  2.33it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.58e+6]Epoch 1506/3000:  50%|█████     | 1505/3000 [14:18<10:40,  2.33it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.58e+6]Epoch 1506/3000:  50%|█████     | 1506/3000 [14:19<10:51,  2.29it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.58e+6]Epoch 1506/3000:  50%|█████     | 1506/3000 [14:19<10:51,  2.29it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.58e+6]Epoch 1507/3000:  50%|█████     | 1506/3000 [14:19<10:51,  2.29it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.58e+6]Epoch 1507/3000:  50%|█████     | 1507/3000 [14:19<10:36,  2.34it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.58e+6]Epoch 1507/3000:  50%|█████     | 1507/3000 [14:19<10:36,  2.34it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.58e+6]Epoch 1508/3000:  50%|█████     | 1507/3000 [14:19<10:36,  2.34it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.58e+6]Epoch 1508/3000:  50%|█████     | 1508/3000 [14:19<10:08,  2.45it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.58e+6]Epoch 1508/3000:  50%|█████     | 1508/3000 [14:19<10:08,  2.45it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.58e+6]Epoch 1509/3000:  50%|█████     | 1508/3000 [14:19<10:08,  2.45it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.58e+6]Epoch 1509/3000:  50%|█████     | 1509/3000 [14:20<10:38,  2.33it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.58e+6]Epoch 1509/3000:  50%|█████     | 1509/3000 [14:20<10:38,  2.33it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.58e+6]Epoch 1510/3000:  50%|█████     | 1509/3000 [14:20<10:38,  2.33it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.58e+6]Epoch 1510/3000:  50%|█████     | 1510/3000 [14:20<10:49,  2.30it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.58e+6]Epoch 1510/3000:  50%|█████     | 1510/3000 [14:20<10:49,  2.30it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.58e+6]Epoch 1511/3000:  50%|█████     | 1510/3000 [14:20<10:49,  2.30it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.58e+6]Epoch 1511/3000:  50%|█████     | 1511/3000 [14:21<10:50,  2.29it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.58e+6]Epoch 1511/3000:  50%|█████     | 1511/3000 [14:21<10:50,  2.29it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.58e+6]Epoch 1512/3000:  50%|█████     | 1511/3000 [14:21<10:50,  2.29it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.58e+6]Epoch 1512/3000:  50%|█████     | 1512/3000 [14:21<10:28,  2.37it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.58e+6]Epoch 1512/3000:  50%|█████     | 1512/3000 [14:21<10:28,  2.37it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.58e+6]Epoch 1513/3000:  50%|█████     | 1512/3000 [14:21<10:28,  2.37it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.58e+6]Epoch 1513/3000:  50%|█████     | 1513/3000 [14:22<10:51,  2.28it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.58e+6]Epoch 1513/3000:  50%|█████     | 1513/3000 [14:22<10:51,  2.28it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.58e+6] Epoch 1514/3000:  50%|█████     | 1513/3000 [14:22<10:51,  2.28it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.58e+6]Epoch 1514/3000:  50%|█████     | 1514/3000 [14:22<11:07,  2.22it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.58e+6]Epoch 1514/3000:  50%|█████     | 1514/3000 [14:22<11:07,  2.22it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.58e+6]Epoch 1515/3000:  50%|█████     | 1514/3000 [14:22<11:07,  2.22it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.58e+6]Epoch 1515/3000:  50%|█████     | 1515/3000 [14:22<11:04,  2.23it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.58e+6]Epoch 1515/3000:  50%|█████     | 1515/3000 [14:22<11:04,  2.23it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.58e+6]Epoch 1516/3000:  50%|█████     | 1515/3000 [14:22<11:04,  2.23it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.58e+6]Epoch 1516/3000:  51%|█████     | 1516/3000 [14:23<11:17,  2.19it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.58e+6]Epoch 1516/3000:  51%|█████     | 1516/3000 [14:23<11:17,  2.19it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.58e+6]Epoch 1517/3000:  51%|█████     | 1516/3000 [14:23<11:17,  2.19it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.58e+6]Epoch 1517/3000:  51%|█████     | 1517/3000 [14:23<11:07,  2.22it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.58e+6]Epoch 1517/3000:  51%|█████     | 1517/3000 [14:23<11:07,  2.22it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.58e+6]Epoch 1518/3000:  51%|█████     | 1517/3000 [14:23<11:07,  2.22it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.58e+6]Epoch 1518/3000:  51%|█████     | 1518/3000 [14:24<10:49,  2.28it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.58e+6]Epoch 1518/3000:  51%|█████     | 1518/3000 [14:24<10:49,  2.28it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.58e+6] Epoch 1519/3000:  51%|█████     | 1518/3000 [14:24<10:49,  2.28it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.58e+6]Epoch 1519/3000:  51%|█████     | 1519/3000 [14:24<10:30,  2.35it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.58e+6]Epoch 1519/3000:  51%|█████     | 1519/3000 [14:24<10:30,  2.35it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.58e+6]Epoch 1520/3000:  51%|█████     | 1519/3000 [14:24<10:30,  2.35it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.58e+6]Epoch 1520/3000:  51%|█████     | 1520/3000 [14:25<10:16,  2.40it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.58e+6]Epoch 1520/3000:  51%|█████     | 1520/3000 [14:25<10:16,  2.40it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.58e+6]Epoch 1521/3000:  51%|█████     | 1520/3000 [14:25<10:16,  2.40it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.58e+6]Epoch 1521/3000:  51%|█████     | 1521/3000 [14:25<10:38,  2.32it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.58e+6]Epoch 1521/3000:  51%|█████     | 1521/3000 [14:25<10:38,  2.32it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.58e+6]Epoch 1522/3000:  51%|█████     | 1521/3000 [14:25<10:38,  2.32it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.58e+6]Epoch 1522/3000:  51%|█████     | 1522/3000 [14:26<10:57,  2.25it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.58e+6]Epoch 1522/3000:  51%|█████     | 1522/3000 [14:26<10:57,  2.25it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.58e+6]Epoch 1523/3000:  51%|█████     | 1522/3000 [14:26<10:57,  2.25it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.58e+6]Epoch 1523/3000:  51%|█████     | 1523/3000 [14:26<11:04,  2.22it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.58e+6]Epoch 1523/3000:  51%|█████     | 1523/3000 [14:26<11:04,  2.22it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.58e+6]Epoch 1524/3000:  51%|█████     | 1523/3000 [14:26<11:04,  2.22it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.58e+6]Epoch 1524/3000:  51%|█████     | 1524/3000 [14:26<10:55,  2.25it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.58e+6]Epoch 1524/3000:  51%|█████     | 1524/3000 [14:26<10:55,  2.25it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.58e+6]Epoch 1525/3000:  51%|█████     | 1524/3000 [14:26<10:55,  2.25it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.58e+6]Epoch 1525/3000:  51%|█████     | 1525/3000 [14:27<11:02,  2.23it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.58e+6]Epoch 1525/3000:  51%|█████     | 1525/3000 [14:27<11:02,  2.23it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.58e+6]Epoch 1526/3000:  51%|█████     | 1525/3000 [14:27<11:02,  2.23it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.58e+6]Epoch 1526/3000:  51%|█████     | 1526/3000 [14:27<11:08,  2.20it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.58e+6]Epoch 1526/3000:  51%|█████     | 1526/3000 [14:27<11:08,  2.20it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.58e+6]Epoch 1527/3000:  51%|█████     | 1526/3000 [14:27<11:08,  2.20it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.58e+6]Epoch 1527/3000:  51%|█████     | 1527/3000 [14:28<11:16,  2.18it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.58e+6]Epoch 1527/3000:  51%|█████     | 1527/3000 [14:28<11:16,  2.18it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.58e+6]Epoch 1528/3000:  51%|█████     | 1527/3000 [14:28<11:16,  2.18it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.58e+6]Epoch 1528/3000:  51%|█████     | 1528/3000 [14:28<11:48,  2.08it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.58e+6]Epoch 1528/3000:  51%|█████     | 1528/3000 [14:28<11:48,  2.08it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.58e+6]Epoch 1529/3000:  51%|█████     | 1528/3000 [14:28<11:48,  2.08it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.58e+6]Epoch 1529/3000:  51%|█████     | 1529/3000 [14:29<11:31,  2.13it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.58e+6]Epoch 1529/3000:  51%|█████     | 1529/3000 [14:29<11:31,  2.13it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.58e+6]Epoch 1530/3000:  51%|█████     | 1529/3000 [14:29<11:31,  2.13it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.58e+6]Epoch 1530/3000:  51%|█████     | 1530/3000 [14:29<11:26,  2.14it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.58e+6]Epoch 1530/3000:  51%|█████     | 1530/3000 [14:29<11:26,  2.14it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.57e+6]Epoch 1531/3000:  51%|█████     | 1530/3000 [14:29<11:26,  2.14it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.57e+6]Epoch 1531/3000:  51%|█████     | 1531/3000 [14:30<11:24,  2.15it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.57e+6]Epoch 1531/3000:  51%|█████     | 1531/3000 [14:30<11:24,  2.15it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.57e+6]Epoch 1532/3000:  51%|█████     | 1531/3000 [14:30<11:24,  2.15it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.57e+6]Epoch 1532/3000:  51%|█████     | 1532/3000 [14:30<11:05,  2.21it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.57e+6]Epoch 1532/3000:  51%|█████     | 1532/3000 [14:30<11:05,  2.21it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.57e+6]Epoch 1533/3000:  51%|█████     | 1532/3000 [14:30<11:05,  2.21it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.57e+6]Epoch 1533/3000:  51%|█████     | 1533/3000 [14:31<11:04,  2.21it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.57e+6]Epoch 1533/3000:  51%|█████     | 1533/3000 [14:31<11:04,  2.21it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1534/3000:  51%|█████     | 1533/3000 [14:31<11:04,  2.21it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1534/3000:  51%|█████     | 1534/3000 [14:31<10:27,  2.34it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1534/3000:  51%|█████     | 1534/3000 [14:31<10:27,  2.34it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.57e+6]Epoch 1535/3000:  51%|█████     | 1534/3000 [14:31<10:27,  2.34it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.57e+6]Epoch 1535/3000:  51%|█████     | 1535/3000 [14:31<10:31,  2.32it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.57e+6]Epoch 1535/3000:  51%|█████     | 1535/3000 [14:31<10:31,  2.32it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.57e+6]Epoch 1536/3000:  51%|█████     | 1535/3000 [14:31<10:31,  2.32it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.57e+6]Epoch 1536/3000:  51%|█████     | 1536/3000 [14:32<10:31,  2.32it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.57e+6]Epoch 1536/3000:  51%|█████     | 1536/3000 [14:32<10:31,  2.32it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.57e+6]Epoch 1537/3000:  51%|█████     | 1536/3000 [14:32<10:31,  2.32it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.57e+6]Epoch 1537/3000:  51%|█████     | 1537/3000 [14:32<10:55,  2.23it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.57e+6]Epoch 1537/3000:  51%|█████     | 1537/3000 [14:32<10:55,  2.23it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1538/3000:  51%|█████     | 1537/3000 [14:32<10:55,  2.23it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1538/3000:  51%|█████▏    | 1538/3000 [14:33<10:45,  2.27it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1538/3000:  51%|█████▏    | 1538/3000 [14:33<10:45,  2.27it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.57e+6]Epoch 1539/3000:  51%|█████▏    | 1538/3000 [14:33<10:45,  2.27it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.57e+6]Epoch 1539/3000:  51%|█████▏    | 1539/3000 [14:33<10:57,  2.22it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.57e+6]Epoch 1539/3000:  51%|█████▏    | 1539/3000 [14:33<10:57,  2.22it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.57e+6] Epoch 1540/3000:  51%|█████▏    | 1539/3000 [14:33<10:57,  2.22it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.57e+6]Epoch 1540/3000:  51%|█████▏    | 1540/3000 [14:34<11:09,  2.18it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.57e+6]Epoch 1540/3000:  51%|█████▏    | 1540/3000 [14:34<11:09,  2.18it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.57e+6]Epoch 1541/3000:  51%|█████▏    | 1540/3000 [14:34<11:09,  2.18it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.57e+6]Epoch 1541/3000:  51%|█████▏    | 1541/3000 [14:34<11:17,  2.15it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.57e+6]Epoch 1541/3000:  51%|█████▏    | 1541/3000 [14:34<11:17,  2.15it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.57e+6]Epoch 1542/3000:  51%|█████▏    | 1541/3000 [14:34<11:17,  2.15it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.57e+6]Epoch 1542/3000:  51%|█████▏    | 1542/3000 [14:35<11:12,  2.17it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.57e+6]Epoch 1542/3000:  51%|█████▏    | 1542/3000 [14:35<11:12,  2.17it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.57e+6]Epoch 1543/3000:  51%|█████▏    | 1542/3000 [14:35<11:12,  2.17it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.57e+6]Epoch 1543/3000:  51%|█████▏    | 1543/3000 [14:35<11:09,  2.18it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.57e+6]Epoch 1543/3000:  51%|█████▏    | 1543/3000 [14:35<11:09,  2.18it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.57e+6]Epoch 1544/3000:  51%|█████▏    | 1543/3000 [14:35<11:09,  2.18it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.57e+6]Epoch 1544/3000:  51%|█████▏    | 1544/3000 [14:35<10:31,  2.30it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.57e+6]Epoch 1544/3000:  51%|█████▏    | 1544/3000 [14:35<10:31,  2.30it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.57e+6]Epoch 1545/3000:  51%|█████▏    | 1544/3000 [14:35<10:31,  2.30it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.57e+6]Epoch 1545/3000:  52%|█████▏    | 1545/3000 [14:36<10:31,  2.30it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.57e+6]Epoch 1545/3000:  52%|█████▏    | 1545/3000 [14:36<10:31,  2.30it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.57e+6] Epoch 1546/3000:  52%|█████▏    | 1545/3000 [14:36<10:31,  2.30it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.57e+6]Epoch 1546/3000:  52%|█████▏    | 1546/3000 [14:36<10:19,  2.35it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.57e+6]Epoch 1546/3000:  52%|█████▏    | 1546/3000 [14:36<10:19,  2.35it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.57e+6]Epoch 1547/3000:  52%|█████▏    | 1546/3000 [14:36<10:19,  2.35it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.57e+6]Epoch 1547/3000:  52%|█████▏    | 1547/3000 [14:37<10:29,  2.31it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.57e+6]Epoch 1547/3000:  52%|█████▏    | 1547/3000 [14:37<10:29,  2.31it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1548/3000:  52%|█████▏    | 1547/3000 [14:37<10:29,  2.31it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1548/3000:  52%|█████▏    | 1548/3000 [14:37<10:51,  2.23it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1548/3000:  52%|█████▏    | 1548/3000 [14:37<10:51,  2.23it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.57e+6] Epoch 1549/3000:  52%|█████▏    | 1548/3000 [14:37<10:51,  2.23it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.57e+6]Epoch 1549/3000:  52%|█████▏    | 1549/3000 [14:38<11:08,  2.17it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.57e+6]Epoch 1549/3000:  52%|█████▏    | 1549/3000 [14:38<11:08,  2.17it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1550/3000:  52%|█████▏    | 1549/3000 [14:38<11:08,  2.17it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1550/3000:  52%|█████▏    | 1550/3000 [14:38<11:04,  2.18it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1550/3000:  52%|█████▏    | 1550/3000 [14:38<11:04,  2.18it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.57e+6]Epoch 1551/3000:  52%|█████▏    | 1550/3000 [14:38<11:04,  2.18it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.57e+6]Epoch 1551/3000:  52%|█████▏    | 1551/3000 [14:39<11:02,  2.19it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.57e+6]Epoch 1551/3000:  52%|█████▏    | 1551/3000 [14:39<11:02,  2.19it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.57e+6]Epoch 1552/3000:  52%|█████▏    | 1551/3000 [14:39<11:02,  2.19it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.57e+6]Epoch 1552/3000:  52%|█████▏    | 1552/3000 [14:39<09:44,  2.48it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.57e+6]Epoch 1552/3000:  52%|█████▏    | 1552/3000 [14:39<09:44,  2.48it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1553/3000:  52%|█████▏    | 1552/3000 [14:39<09:44,  2.48it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1553/3000:  52%|█████▏    | 1553/3000 [14:39<08:48,  2.74it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1553/3000:  52%|█████▏    | 1553/3000 [14:39<08:48,  2.74it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1554/3000:  52%|█████▏    | 1553/3000 [14:39<08:48,  2.74it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1554/3000:  52%|█████▏    | 1554/3000 [14:40<09:08,  2.63it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1554/3000:  52%|█████▏    | 1554/3000 [14:40<09:08,  2.63it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1555/3000:  52%|█████▏    | 1554/3000 [14:40<09:08,  2.63it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1555/3000:  52%|█████▏    | 1555/3000 [14:40<09:44,  2.47it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1555/3000:  52%|█████▏    | 1555/3000 [14:40<09:44,  2.47it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.57e+6]Epoch 1556/3000:  52%|█████▏    | 1555/3000 [14:40<09:44,  2.47it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.57e+6]Epoch 1556/3000:  52%|█████▏    | 1556/3000 [14:40<10:00,  2.41it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.57e+6]Epoch 1556/3000:  52%|█████▏    | 1556/3000 [14:41<10:00,  2.41it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.57e+6]Epoch 1557/3000:  52%|█████▏    | 1556/3000 [14:41<10:00,  2.41it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.57e+6]Epoch 1557/3000:  52%|█████▏    | 1557/3000 [14:41<09:46,  2.46it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.57e+6]Epoch 1557/3000:  52%|█████▏    | 1557/3000 [14:41<09:46,  2.46it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1558/3000:  52%|█████▏    | 1557/3000 [14:41<09:46,  2.46it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1558/3000:  52%|█████▏    | 1558/3000 [14:41<10:18,  2.33it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1558/3000:  52%|█████▏    | 1558/3000 [14:41<10:18,  2.33it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.57e+6]Epoch 1559/3000:  52%|█████▏    | 1558/3000 [14:41<10:18,  2.33it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.57e+6]Epoch 1559/3000:  52%|█████▏    | 1559/3000 [14:42<10:12,  2.35it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.57e+6]Epoch 1559/3000:  52%|█████▏    | 1559/3000 [14:42<10:12,  2.35it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.57e+6]Epoch 1560/3000:  52%|█████▏    | 1559/3000 [14:42<10:12,  2.35it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.57e+6]Epoch 1560/3000:  52%|█████▏    | 1560/3000 [14:42<09:59,  2.40it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.57e+6]Epoch 1560/3000:  52%|█████▏    | 1560/3000 [14:42<09:59,  2.40it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.57e+6]Epoch 1561/3000:  52%|█████▏    | 1560/3000 [14:42<09:59,  2.40it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.57e+6]Epoch 1561/3000:  52%|█████▏    | 1561/3000 [14:43<10:54,  2.20it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.57e+6]Epoch 1561/3000:  52%|█████▏    | 1561/3000 [14:43<10:54,  2.20it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.57e+6]Epoch 1562/3000:  52%|█████▏    | 1561/3000 [14:43<10:54,  2.20it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.57e+6]Epoch 1562/3000:  52%|█████▏    | 1562/3000 [14:43<10:15,  2.34it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.57e+6]Epoch 1562/3000:  52%|█████▏    | 1562/3000 [14:43<10:15,  2.34it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.57e+6]Epoch 1563/3000:  52%|█████▏    | 1562/3000 [14:43<10:15,  2.34it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.57e+6]Epoch 1563/3000:  52%|█████▏    | 1563/3000 [14:44<10:12,  2.35it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.57e+6]Epoch 1563/3000:  52%|█████▏    | 1563/3000 [14:44<10:12,  2.35it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.57e+6]Epoch 1564/3000:  52%|█████▏    | 1563/3000 [14:44<10:12,  2.35it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.57e+6]Epoch 1564/3000:  52%|█████▏    | 1564/3000 [14:44<10:14,  2.34it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.57e+6]Epoch 1564/3000:  52%|█████▏    | 1564/3000 [14:44<10:14,  2.34it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.57e+6] Epoch 1565/3000:  52%|█████▏    | 1564/3000 [14:44<10:14,  2.34it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.57e+6]Epoch 1565/3000:  52%|█████▏    | 1565/3000 [14:44<10:09,  2.36it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.57e+6]Epoch 1565/3000:  52%|█████▏    | 1565/3000 [14:44<10:09,  2.36it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1566/3000:  52%|█████▏    | 1565/3000 [14:44<10:09,  2.36it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1566/3000:  52%|█████▏    | 1566/3000 [14:45<10:26,  2.29it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.57e+6]Epoch 1566/3000:  52%|█████▏    | 1566/3000 [14:45<10:26,  2.29it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.57e+6]Epoch 1567/3000:  52%|█████▏    | 1566/3000 [14:45<10:26,  2.29it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.57e+6]Epoch 1567/3000:  52%|█████▏    | 1567/3000 [14:45<10:19,  2.31it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.57e+6]Epoch 1567/3000:  52%|█████▏    | 1567/3000 [14:45<10:19,  2.31it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.56e+6]Epoch 1568/3000:  52%|█████▏    | 1567/3000 [14:45<10:19,  2.31it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.56e+6]Epoch 1568/3000:  52%|█████▏    | 1568/3000 [14:46<09:51,  2.42it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.56e+6]Epoch 1568/3000:  52%|█████▏    | 1568/3000 [14:46<09:51,  2.42it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.56e+6]Epoch 1569/3000:  52%|█████▏    | 1568/3000 [14:46<09:51,  2.42it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.56e+6]Epoch 1569/3000:  52%|█████▏    | 1569/3000 [14:46<09:40,  2.47it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.56e+6]Epoch 1569/3000:  52%|█████▏    | 1569/3000 [14:46<09:40,  2.47it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.56e+6]Epoch 1570/3000:  52%|█████▏    | 1569/3000 [14:46<09:40,  2.47it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.56e+6]Epoch 1570/3000:  52%|█████▏    | 1570/3000 [14:46<09:30,  2.51it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.56e+6]Epoch 1570/3000:  52%|█████▏    | 1570/3000 [14:46<09:30,  2.51it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.56e+6] Epoch 1571/3000:  52%|█████▏    | 1570/3000 [14:46<09:30,  2.51it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.56e+6]Epoch 1571/3000:  52%|█████▏    | 1571/3000 [14:47<09:48,  2.43it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.56e+6]Epoch 1571/3000:  52%|█████▏    | 1571/3000 [14:47<09:48,  2.43it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.56e+6]Epoch 1572/3000:  52%|█████▏    | 1571/3000 [14:47<09:48,  2.43it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.56e+6]Epoch 1572/3000:  52%|█████▏    | 1572/3000 [14:47<10:17,  2.31it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.56e+6]Epoch 1572/3000:  52%|█████▏    | 1572/3000 [14:47<10:17,  2.31it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.56e+6]Epoch 1573/3000:  52%|█████▏    | 1572/3000 [14:47<10:17,  2.31it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.56e+6]Epoch 1573/3000:  52%|█████▏    | 1573/3000 [14:48<10:55,  2.18it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.56e+6]Epoch 1573/3000:  52%|█████▏    | 1573/3000 [14:48<10:55,  2.18it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.56e+6]Epoch 1574/3000:  52%|█████▏    | 1573/3000 [14:48<10:55,  2.18it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.56e+6]Epoch 1574/3000:  52%|█████▏    | 1574/3000 [14:48<10:40,  2.23it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.56e+6]Epoch 1574/3000:  52%|█████▏    | 1574/3000 [14:48<10:40,  2.23it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.56e+6]Epoch 1575/3000:  52%|█████▏    | 1574/3000 [14:48<10:40,  2.23it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.56e+6]Epoch 1575/3000:  52%|█████▎    | 1575/3000 [14:49<10:41,  2.22it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.56e+6]Epoch 1575/3000:  52%|█████▎    | 1575/3000 [14:49<10:41,  2.22it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.56e+6]Epoch 1576/3000:  52%|█████▎    | 1575/3000 [14:49<10:41,  2.22it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.56e+6]Epoch 1576/3000:  53%|█████▎    | 1576/3000 [14:49<11:02,  2.15it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.56e+6]Epoch 1576/3000:  53%|█████▎    | 1576/3000 [14:49<11:02,  2.15it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.56e+6]Epoch 1577/3000:  53%|█████▎    | 1576/3000 [14:49<11:02,  2.15it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.56e+6]Epoch 1577/3000:  53%|█████▎    | 1577/3000 [14:50<10:50,  2.19it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.56e+6]Epoch 1577/3000:  53%|█████▎    | 1577/3000 [14:50<10:50,  2.19it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.56e+6]Epoch 1578/3000:  53%|█████▎    | 1577/3000 [14:50<10:50,  2.19it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.56e+6]Epoch 1578/3000:  53%|█████▎    | 1578/3000 [14:50<10:42,  2.21it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.56e+6]Epoch 1578/3000:  53%|█████▎    | 1578/3000 [14:50<10:42,  2.21it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.56e+6]Epoch 1579/3000:  53%|█████▎    | 1578/3000 [14:50<10:42,  2.21it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.56e+6]Epoch 1579/3000:  53%|█████▎    | 1579/3000 [14:51<10:44,  2.21it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.56e+6]Epoch 1579/3000:  53%|█████▎    | 1579/3000 [14:51<10:44,  2.21it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.56e+6]Epoch 1580/3000:  53%|█████▎    | 1579/3000 [14:51<10:44,  2.21it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.56e+6]Epoch 1580/3000:  53%|█████▎    | 1580/3000 [14:51<10:45,  2.20it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.56e+6]Epoch 1580/3000:  53%|█████▎    | 1580/3000 [14:51<10:45,  2.20it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.56e+6] Epoch 1581/3000:  53%|█████▎    | 1580/3000 [14:51<10:45,  2.20it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.56e+6]Epoch 1581/3000:  53%|█████▎    | 1581/3000 [14:51<10:27,  2.26it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.56e+6]Epoch 1581/3000:  53%|█████▎    | 1581/3000 [14:51<10:27,  2.26it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.56e+6]Epoch 1582/3000:  53%|█████▎    | 1581/3000 [14:51<10:27,  2.26it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.56e+6]Epoch 1582/3000:  53%|█████▎    | 1582/3000 [14:52<10:14,  2.31it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.56e+6]Epoch 1582/3000:  53%|█████▎    | 1582/3000 [14:52<10:14,  2.31it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.56e+6]Epoch 1583/3000:  53%|█████▎    | 1582/3000 [14:52<10:14,  2.31it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.56e+6]Epoch 1583/3000:  53%|█████▎    | 1583/3000 [14:52<10:23,  2.27it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.56e+6]Epoch 1583/3000:  53%|█████▎    | 1583/3000 [14:52<10:23,  2.27it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.56e+6]Epoch 1584/3000:  53%|█████▎    | 1583/3000 [14:52<10:23,  2.27it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.56e+6]Epoch 1584/3000:  53%|█████▎    | 1584/3000 [14:53<10:08,  2.33it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.56e+6]Epoch 1584/3000:  53%|█████▎    | 1584/3000 [14:53<10:08,  2.33it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.56e+6] Epoch 1585/3000:  53%|█████▎    | 1584/3000 [14:53<10:08,  2.33it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.56e+6]Epoch 1585/3000:  53%|█████▎    | 1585/3000 [14:53<10:35,  2.23it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.56e+6]Epoch 1585/3000:  53%|█████▎    | 1585/3000 [14:53<10:35,  2.23it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.56e+6]Epoch 1586/3000:  53%|█████▎    | 1585/3000 [14:53<10:35,  2.23it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.56e+6]Epoch 1586/3000:  53%|█████▎    | 1586/3000 [14:54<10:40,  2.21it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.56e+6]Epoch 1586/3000:  53%|█████▎    | 1586/3000 [14:54<10:40,  2.21it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.56e+6]Epoch 1587/3000:  53%|█████▎    | 1586/3000 [14:54<10:40,  2.21it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.56e+6]Epoch 1587/3000:  53%|█████▎    | 1587/3000 [14:54<10:53,  2.16it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.56e+6]Epoch 1587/3000:  53%|█████▎    | 1587/3000 [14:54<10:53,  2.16it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.56e+6]Epoch 1588/3000:  53%|█████▎    | 1587/3000 [14:54<10:53,  2.16it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.56e+6]Epoch 1588/3000:  53%|█████▎    | 1588/3000 [14:55<10:28,  2.25it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.56e+6]Epoch 1588/3000:  53%|█████▎    | 1588/3000 [14:55<10:28,  2.25it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.56e+6]Epoch 1589/3000:  53%|█████▎    | 1588/3000 [14:55<10:28,  2.25it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.56e+6]Epoch 1589/3000:  53%|█████▎    | 1589/3000 [14:55<10:41,  2.20it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.56e+6]Epoch 1589/3000:  53%|█████▎    | 1589/3000 [14:55<10:41,  2.20it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.56e+6]Epoch 1590/3000:  53%|█████▎    | 1589/3000 [14:55<10:41,  2.20it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.56e+6]Epoch 1590/3000:  53%|█████▎    | 1590/3000 [14:55<10:29,  2.24it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.56e+6]Epoch 1590/3000:  53%|█████▎    | 1590/3000 [14:55<10:29,  2.24it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.56e+6]Epoch 1591/3000:  53%|█████▎    | 1590/3000 [14:55<10:29,  2.24it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.56e+6]Epoch 1591/3000:  53%|█████▎    | 1591/3000 [14:56<10:44,  2.19it/s, v_num=1, train_loss_step=1.63e+6, train_loss_epoch=1.56e+6]Epoch 1591/3000:  53%|█████▎    | 1591/3000 [14:56<10:44,  2.19it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.56e+6]Epoch 1592/3000:  53%|█████▎    | 1591/3000 [14:56<10:44,  2.19it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.56e+6]Epoch 1592/3000:  53%|█████▎    | 1592/3000 [14:56<10:51,  2.16it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.56e+6]Epoch 1592/3000:  53%|█████▎    | 1592/3000 [14:56<10:51,  2.16it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.56e+6]Epoch 1593/3000:  53%|█████▎    | 1592/3000 [14:56<10:51,  2.16it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.56e+6]Epoch 1593/3000:  53%|█████▎    | 1593/3000 [14:57<10:16,  2.28it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.56e+6]Epoch 1593/3000:  53%|█████▎    | 1593/3000 [14:57<10:16,  2.28it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.56e+6]Epoch 1594/3000:  53%|█████▎    | 1593/3000 [14:57<10:16,  2.28it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.56e+6]Epoch 1594/3000:  53%|█████▎    | 1594/3000 [14:57<09:43,  2.41it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.56e+6]Epoch 1594/3000:  53%|█████▎    | 1594/3000 [14:57<09:43,  2.41it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.56e+6]Epoch 1595/3000:  53%|█████▎    | 1594/3000 [14:57<09:43,  2.41it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.56e+6]Epoch 1595/3000:  53%|█████▎    | 1595/3000 [14:58<10:08,  2.31it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.56e+6]Epoch 1595/3000:  53%|█████▎    | 1595/3000 [14:58<10:08,  2.31it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.56e+6]Epoch 1596/3000:  53%|█████▎    | 1595/3000 [14:58<10:08,  2.31it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.56e+6]Epoch 1596/3000:  53%|█████▎    | 1596/3000 [14:58<10:18,  2.27it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.56e+6]Epoch 1596/3000:  53%|█████▎    | 1596/3000 [14:58<10:18,  2.27it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.56e+6]Epoch 1597/3000:  53%|█████▎    | 1596/3000 [14:58<10:18,  2.27it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.56e+6]Epoch 1597/3000:  53%|█████▎    | 1597/3000 [14:59<10:26,  2.24it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.56e+6]Epoch 1597/3000:  53%|█████▎    | 1597/3000 [14:59<10:26,  2.24it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.56e+6]Epoch 1598/3000:  53%|█████▎    | 1597/3000 [14:59<10:26,  2.24it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.56e+6]Epoch 1598/3000:  53%|█████▎    | 1598/3000 [14:59<10:14,  2.28it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.56e+6]Epoch 1598/3000:  53%|█████▎    | 1598/3000 [14:59<10:14,  2.28it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.56e+6]Epoch 1599/3000:  53%|█████▎    | 1598/3000 [14:59<10:14,  2.28it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.56e+6]Epoch 1599/3000:  53%|█████▎    | 1599/3000 [15:00<11:05,  2.11it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.56e+6]Epoch 1599/3000:  53%|█████▎    | 1599/3000 [15:00<11:05,  2.11it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.56e+6]Epoch 1600/3000:  53%|█████▎    | 1599/3000 [15:00<11:05,  2.11it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.56e+6]Epoch 1600/3000:  53%|█████▎    | 1600/3000 [15:00<11:09,  2.09it/s, v_num=1, train_loss_step=1.61e+6, train_loss_epoch=1.56e+6]Epoch 1600/3000:  53%|█████▎    | 1600/3000 [15:00<11:09,  2.09it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.56e+6]Epoch 1601/3000:  53%|█████▎    | 1600/3000 [15:00<11:09,  2.09it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.56e+6]Epoch 1601/3000:  53%|█████▎    | 1601/3000 [15:00<10:34,  2.21it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.56e+6]Epoch 1601/3000:  53%|█████▎    | 1601/3000 [15:00<10:34,  2.21it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.56e+6]Epoch 1602/3000:  53%|█████▎    | 1601/3000 [15:00<10:34,  2.21it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.56e+6]Epoch 1602/3000:  53%|█████▎    | 1602/3000 [15:01<10:23,  2.24it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.56e+6]Epoch 1602/3000:  53%|█████▎    | 1602/3000 [15:01<10:23,  2.24it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.56e+6] Epoch 1603/3000:  53%|█████▎    | 1602/3000 [15:01<10:23,  2.24it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.56e+6]Epoch 1603/3000:  53%|█████▎    | 1603/3000 [15:01<09:52,  2.36it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.56e+6]Epoch 1603/3000:  53%|█████▎    | 1603/3000 [15:01<09:52,  2.36it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.56e+6]Epoch 1604/3000:  53%|█████▎    | 1603/3000 [15:01<09:52,  2.36it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.56e+6]Epoch 1604/3000:  53%|█████▎    | 1604/3000 [15:02<10:02,  2.32it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.56e+6]Epoch 1604/3000:  53%|█████▎    | 1604/3000 [15:02<10:02,  2.32it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.56e+6]Epoch 1605/3000:  53%|█████▎    | 1604/3000 [15:02<10:02,  2.32it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.56e+6]Epoch 1605/3000:  54%|█████▎    | 1605/3000 [15:02<10:06,  2.30it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.56e+6]Epoch 1605/3000:  54%|█████▎    | 1605/3000 [15:02<10:06,  2.30it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.56e+6]Epoch 1606/3000:  54%|█████▎    | 1605/3000 [15:02<10:06,  2.30it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.56e+6]Epoch 1606/3000:  54%|█████▎    | 1606/3000 [15:02<09:55,  2.34it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.56e+6]Epoch 1606/3000:  54%|█████▎    | 1606/3000 [15:02<09:55,  2.34it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.56e+6]Epoch 1607/3000:  54%|█████▎    | 1606/3000 [15:03<09:55,  2.34it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.56e+6]Epoch 1607/3000:  54%|█████▎    | 1607/3000 [15:03<10:15,  2.26it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.56e+6]Epoch 1607/3000:  54%|█████▎    | 1607/3000 [15:03<10:15,  2.26it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.56e+6]Epoch 1608/3000:  54%|█████▎    | 1607/3000 [15:03<10:15,  2.26it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.56e+6]Epoch 1608/3000:  54%|█████▎    | 1608/3000 [15:03<10:17,  2.26it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.56e+6]Epoch 1608/3000:  54%|█████▎    | 1608/3000 [15:03<10:17,  2.26it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1609/3000:  54%|█████▎    | 1608/3000 [15:03<10:17,  2.26it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1609/3000:  54%|█████▎    | 1609/3000 [15:04<10:25,  2.22it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1609/3000:  54%|█████▎    | 1609/3000 [15:04<10:25,  2.22it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1610/3000:  54%|█████▎    | 1609/3000 [15:04<10:25,  2.22it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1610/3000:  54%|█████▎    | 1610/3000 [15:04<09:39,  2.40it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1610/3000:  54%|█████▎    | 1610/3000 [15:04<09:39,  2.40it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1611/3000:  54%|█████▎    | 1610/3000 [15:04<09:39,  2.40it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1611/3000:  54%|█████▎    | 1611/3000 [15:05<09:35,  2.41it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1611/3000:  54%|█████▎    | 1611/3000 [15:05<09:35,  2.41it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.55e+6]Epoch 1612/3000:  54%|█████▎    | 1611/3000 [15:05<09:35,  2.41it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.55e+6]Epoch 1612/3000:  54%|█████▎    | 1612/3000 [15:05<09:48,  2.36it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.55e+6]Epoch 1612/3000:  54%|█████▎    | 1612/3000 [15:05<09:48,  2.36it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1613/3000:  54%|█████▎    | 1612/3000 [15:05<09:48,  2.36it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1613/3000:  54%|█████▍    | 1613/3000 [15:06<09:51,  2.34it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1613/3000:  54%|█████▍    | 1613/3000 [15:06<09:51,  2.34it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1614/3000:  54%|█████▍    | 1613/3000 [15:06<09:51,  2.34it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1614/3000:  54%|█████▍    | 1614/3000 [15:06<10:14,  2.26it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1614/3000:  54%|█████▍    | 1614/3000 [15:06<10:14,  2.26it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1615/3000:  54%|█████▍    | 1614/3000 [15:06<10:14,  2.26it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1615/3000:  54%|█████▍    | 1615/3000 [15:06<10:28,  2.21it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1615/3000:  54%|█████▍    | 1615/3000 [15:06<10:28,  2.21it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.55e+6]Epoch 1616/3000:  54%|█████▍    | 1615/3000 [15:06<10:28,  2.21it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.55e+6]Epoch 1616/3000:  54%|█████▍    | 1616/3000 [15:07<10:34,  2.18it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.55e+6]Epoch 1616/3000:  54%|█████▍    | 1616/3000 [15:07<10:34,  2.18it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1617/3000:  54%|█████▍    | 1616/3000 [15:07<10:34,  2.18it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1617/3000:  54%|█████▍    | 1617/3000 [15:07<10:54,  2.11it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1617/3000:  54%|█████▍    | 1617/3000 [15:07<10:54,  2.11it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1618/3000:  54%|█████▍    | 1617/3000 [15:07<10:54,  2.11it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1618/3000:  54%|█████▍    | 1618/3000 [15:08<10:57,  2.10it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1618/3000:  54%|█████▍    | 1618/3000 [15:08<10:57,  2.10it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.55e+6]Epoch 1619/3000:  54%|█████▍    | 1618/3000 [15:08<10:57,  2.10it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.55e+6]Epoch 1619/3000:  54%|█████▍    | 1619/3000 [15:08<10:16,  2.24it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.55e+6]Epoch 1619/3000:  54%|█████▍    | 1619/3000 [15:08<10:16,  2.24it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1620/3000:  54%|█████▍    | 1619/3000 [15:08<10:16,  2.24it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1620/3000:  54%|█████▍    | 1620/3000 [15:09<10:31,  2.19it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1620/3000:  54%|█████▍    | 1620/3000 [15:09<10:31,  2.19it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.55e+6]Epoch 1621/3000:  54%|█████▍    | 1620/3000 [15:09<10:31,  2.19it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.55e+6]Epoch 1621/3000:  54%|█████▍    | 1621/3000 [15:09<10:42,  2.15it/s, v_num=1, train_loss_step=1.62e+6, train_loss_epoch=1.55e+6]Epoch 1621/3000:  54%|█████▍    | 1621/3000 [15:09<10:42,  2.15it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1622/3000:  54%|█████▍    | 1621/3000 [15:09<10:42,  2.15it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1622/3000:  54%|█████▍    | 1622/3000 [15:10<10:16,  2.23it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1622/3000:  54%|█████▍    | 1622/3000 [15:10<10:16,  2.23it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1623/3000:  54%|█████▍    | 1622/3000 [15:10<10:16,  2.23it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1623/3000:  54%|█████▍    | 1623/3000 [15:10<10:27,  2.19it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1623/3000:  54%|█████▍    | 1623/3000 [15:10<10:27,  2.19it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.55e+6]Epoch 1624/3000:  54%|█████▍    | 1623/3000 [15:10<10:27,  2.19it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.55e+6]Epoch 1624/3000:  54%|█████▍    | 1624/3000 [15:11<09:57,  2.30it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.55e+6]Epoch 1624/3000:  54%|█████▍    | 1624/3000 [15:11<09:57,  2.30it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.55e+6]Epoch 1625/3000:  54%|█████▍    | 1624/3000 [15:11<09:57,  2.30it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.55e+6]Epoch 1625/3000:  54%|█████▍    | 1625/3000 [15:11<10:01,  2.29it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.55e+6]Epoch 1625/3000:  54%|█████▍    | 1625/3000 [15:11<10:01,  2.29it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.55e+6]Epoch 1626/3000:  54%|█████▍    | 1625/3000 [15:11<10:01,  2.29it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.55e+6]Epoch 1626/3000:  54%|█████▍    | 1626/3000 [15:11<10:22,  2.21it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.55e+6]Epoch 1626/3000:  54%|█████▍    | 1626/3000 [15:11<10:22,  2.21it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.55e+6]Epoch 1627/3000:  54%|█████▍    | 1626/3000 [15:11<10:22,  2.21it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.55e+6]Epoch 1627/3000:  54%|█████▍    | 1627/3000 [15:12<10:40,  2.14it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.55e+6]Epoch 1627/3000:  54%|█████▍    | 1627/3000 [15:12<10:40,  2.14it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1628/3000:  54%|█████▍    | 1627/3000 [15:12<10:40,  2.14it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1628/3000:  54%|█████▍    | 1628/3000 [15:12<10:07,  2.26it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1628/3000:  54%|█████▍    | 1628/3000 [15:12<10:07,  2.26it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1629/3000:  54%|█████▍    | 1628/3000 [15:12<10:07,  2.26it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1629/3000:  54%|█████▍    | 1629/3000 [15:13<09:57,  2.29it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1629/3000:  54%|█████▍    | 1629/3000 [15:13<09:57,  2.29it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.55e+6]Epoch 1630/3000:  54%|█████▍    | 1629/3000 [15:13<09:57,  2.29it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.55e+6]Epoch 1630/3000:  54%|█████▍    | 1630/3000 [15:13<10:04,  2.27it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.55e+6]Epoch 1630/3000:  54%|█████▍    | 1630/3000 [15:13<10:04,  2.27it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.55e+6]Epoch 1631/3000:  54%|█████▍    | 1630/3000 [15:13<10:04,  2.27it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.55e+6]Epoch 1631/3000:  54%|█████▍    | 1631/3000 [15:14<10:06,  2.26it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.55e+6]Epoch 1631/3000:  54%|█████▍    | 1631/3000 [15:14<10:06,  2.26it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.55e+6]Epoch 1632/3000:  54%|█████▍    | 1631/3000 [15:14<10:06,  2.26it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.55e+6]Epoch 1632/3000:  54%|█████▍    | 1632/3000 [15:14<10:07,  2.25it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.55e+6]Epoch 1632/3000:  54%|█████▍    | 1632/3000 [15:14<10:07,  2.25it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.55e+6]Epoch 1633/3000:  54%|█████▍    | 1632/3000 [15:14<10:07,  2.25it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.55e+6]Epoch 1633/3000:  54%|█████▍    | 1633/3000 [15:15<10:05,  2.26it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.55e+6]Epoch 1633/3000:  54%|█████▍    | 1633/3000 [15:15<10:05,  2.26it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.55e+6]Epoch 1634/3000:  54%|█████▍    | 1633/3000 [15:15<10:05,  2.26it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.55e+6]Epoch 1634/3000:  54%|█████▍    | 1634/3000 [15:15<09:48,  2.32it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.55e+6]Epoch 1634/3000:  54%|█████▍    | 1634/3000 [15:15<09:48,  2.32it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.55e+6]Epoch 1635/3000:  54%|█████▍    | 1634/3000 [15:15<09:48,  2.32it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.55e+6]Epoch 1635/3000:  55%|█████▍    | 1635/3000 [15:15<10:03,  2.26it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.55e+6]Epoch 1635/3000:  55%|█████▍    | 1635/3000 [15:15<10:03,  2.26it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1636/3000:  55%|█████▍    | 1635/3000 [15:15<10:03,  2.26it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1636/3000:  55%|█████▍    | 1636/3000 [15:16<10:22,  2.19it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1636/3000:  55%|█████▍    | 1636/3000 [15:16<10:22,  2.19it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.55e+6]Epoch 1637/3000:  55%|█████▍    | 1636/3000 [15:16<10:22,  2.19it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.55e+6]Epoch 1637/3000:  55%|█████▍    | 1637/3000 [15:16<10:34,  2.15it/s, v_num=1, train_loss_step=1.64e+6, train_loss_epoch=1.55e+6]Epoch 1637/3000:  55%|█████▍    | 1637/3000 [15:16<10:34,  2.15it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.55e+6]Epoch 1638/3000:  55%|█████▍    | 1637/3000 [15:16<10:34,  2.15it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.55e+6]Epoch 1638/3000:  55%|█████▍    | 1638/3000 [15:17<10:32,  2.15it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.55e+6]Epoch 1638/3000:  55%|█████▍    | 1638/3000 [15:17<10:32,  2.15it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.55e+6]Epoch 1639/3000:  55%|█████▍    | 1638/3000 [15:17<10:32,  2.15it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.55e+6]Epoch 1639/3000:  55%|█████▍    | 1639/3000 [15:17<10:46,  2.11it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.55e+6]Epoch 1639/3000:  55%|█████▍    | 1639/3000 [15:17<10:46,  2.11it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.55e+6]Epoch 1640/3000:  55%|█████▍    | 1639/3000 [15:17<10:46,  2.11it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.55e+6]Epoch 1640/3000:  55%|█████▍    | 1640/3000 [15:18<11:32,  1.96it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.55e+6]Epoch 1640/3000:  55%|█████▍    | 1640/3000 [15:18<11:32,  1.96it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.55e+6]Epoch 1641/3000:  55%|█████▍    | 1640/3000 [15:18<11:32,  1.96it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.55e+6]Epoch 1641/3000:  55%|█████▍    | 1641/3000 [15:18<11:32,  1.96it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.55e+6]Epoch 1641/3000:  55%|█████▍    | 1641/3000 [15:18<11:32,  1.96it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1642/3000:  55%|█████▍    | 1641/3000 [15:18<11:32,  1.96it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1642/3000:  55%|█████▍    | 1642/3000 [15:19<10:17,  2.20it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.55e+6]Epoch 1642/3000:  55%|█████▍    | 1642/3000 [15:19<10:17,  2.20it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1643/3000:  55%|█████▍    | 1642/3000 [15:19<10:17,  2.20it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1643/3000:  55%|█████▍    | 1643/3000 [15:19<10:10,  2.22it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1643/3000:  55%|█████▍    | 1643/3000 [15:19<10:10,  2.22it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.55e+6]Epoch 1644/3000:  55%|█████▍    | 1643/3000 [15:19<10:10,  2.22it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.55e+6]Epoch 1644/3000:  55%|█████▍    | 1644/3000 [15:20<10:22,  2.18it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.55e+6]Epoch 1644/3000:  55%|█████▍    | 1644/3000 [15:20<10:22,  2.18it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1645/3000:  55%|█████▍    | 1644/3000 [15:20<10:22,  2.18it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1645/3000:  55%|█████▍    | 1645/3000 [15:20<10:24,  2.17it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1645/3000:  55%|█████▍    | 1645/3000 [15:20<10:24,  2.17it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.55e+6]Epoch 1646/3000:  55%|█████▍    | 1645/3000 [15:20<10:24,  2.17it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.55e+6]Epoch 1646/3000:  55%|█████▍    | 1646/3000 [15:21<10:48,  2.09it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.55e+6]Epoch 1646/3000:  55%|█████▍    | 1646/3000 [15:21<10:48,  2.09it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.55e+6]Epoch 1647/3000:  55%|█████▍    | 1646/3000 [15:21<10:48,  2.09it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.55e+6]Epoch 1647/3000:  55%|█████▍    | 1647/3000 [15:21<10:46,  2.09it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.55e+6]Epoch 1647/3000:  55%|█████▍    | 1647/3000 [15:21<10:46,  2.09it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.55e+6]Epoch 1648/3000:  55%|█████▍    | 1647/3000 [15:21<10:46,  2.09it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.55e+6]Epoch 1648/3000:  55%|█████▍    | 1648/3000 [15:22<10:29,  2.15it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.55e+6]Epoch 1648/3000:  55%|█████▍    | 1648/3000 [15:22<10:29,  2.15it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.55e+6]Epoch 1649/3000:  55%|█████▍    | 1648/3000 [15:22<10:29,  2.15it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.55e+6]Epoch 1649/3000:  55%|█████▍    | 1649/3000 [15:22<10:29,  2.15it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.55e+6]Epoch 1649/3000:  55%|█████▍    | 1649/3000 [15:22<10:29,  2.15it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.55e+6]Epoch 1650/3000:  55%|█████▍    | 1649/3000 [15:22<10:29,  2.15it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.55e+6]Epoch 1650/3000:  55%|█████▌    | 1650/3000 [15:23<10:52,  2.07it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.55e+6]Epoch 1650/3000:  55%|█████▌    | 1650/3000 [15:23<10:52,  2.07it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.55e+6]Epoch 1651/3000:  55%|█████▌    | 1650/3000 [15:23<10:52,  2.07it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.55e+6]Epoch 1651/3000:  55%|█████▌    | 1651/3000 [15:23<10:00,  2.25it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.55e+6]Epoch 1651/3000:  55%|█████▌    | 1651/3000 [15:23<10:00,  2.25it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1652/3000:  55%|█████▌    | 1651/3000 [15:23<10:00,  2.25it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1652/3000:  55%|█████▌    | 1652/3000 [15:23<10:00,  2.25it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.55e+6]Epoch 1652/3000:  55%|█████▌    | 1652/3000 [15:23<10:00,  2.25it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.54e+6]Epoch 1653/3000:  55%|█████▌    | 1652/3000 [15:23<10:00,  2.25it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.54e+6]Epoch 1653/3000:  55%|█████▌    | 1653/3000 [15:24<09:50,  2.28it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.54e+6]Epoch 1653/3000:  55%|█████▌    | 1653/3000 [15:24<09:50,  2.28it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1654/3000:  55%|█████▌    | 1653/3000 [15:24<09:50,  2.28it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1654/3000:  55%|█████▌    | 1654/3000 [15:24<09:28,  2.37it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1654/3000:  55%|█████▌    | 1654/3000 [15:24<09:28,  2.37it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1655/3000:  55%|█████▌    | 1654/3000 [15:24<09:28,  2.37it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1655/3000:  55%|█████▌    | 1655/3000 [15:25<09:26,  2.38it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1655/3000:  55%|█████▌    | 1655/3000 [15:25<09:26,  2.38it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1656/3000:  55%|█████▌    | 1655/3000 [15:25<09:26,  2.38it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1656/3000:  55%|█████▌    | 1656/3000 [15:25<09:22,  2.39it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1656/3000:  55%|█████▌    | 1656/3000 [15:25<09:22,  2.39it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.54e+6]Epoch 1657/3000:  55%|█████▌    | 1656/3000 [15:25<09:22,  2.39it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.54e+6]Epoch 1657/3000:  55%|█████▌    | 1657/3000 [15:25<09:08,  2.45it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.54e+6]Epoch 1657/3000:  55%|█████▌    | 1657/3000 [15:25<09:08,  2.45it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.54e+6]Epoch 1658/3000:  55%|█████▌    | 1657/3000 [15:25<09:08,  2.45it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.54e+6]Epoch 1658/3000:  55%|█████▌    | 1658/3000 [15:26<09:04,  2.46it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.54e+6]Epoch 1658/3000:  55%|█████▌    | 1658/3000 [15:26<09:04,  2.46it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.54e+6]Epoch 1659/3000:  55%|█████▌    | 1658/3000 [15:26<09:04,  2.46it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.54e+6]Epoch 1659/3000:  55%|█████▌    | 1659/3000 [15:26<09:08,  2.45it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.54e+6]Epoch 1659/3000:  55%|█████▌    | 1659/3000 [15:26<09:08,  2.45it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.54e+6]Epoch 1660/3000:  55%|█████▌    | 1659/3000 [15:26<09:08,  2.45it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.54e+6]Epoch 1660/3000:  55%|█████▌    | 1660/3000 [15:27<09:48,  2.28it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.54e+6]Epoch 1660/3000:  55%|█████▌    | 1660/3000 [15:27<09:48,  2.28it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1661/3000:  55%|█████▌    | 1660/3000 [15:27<09:48,  2.28it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1661/3000:  55%|█████▌    | 1661/3000 [15:27<09:33,  2.34it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1661/3000:  55%|█████▌    | 1661/3000 [15:27<09:33,  2.34it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.54e+6] Epoch 1662/3000:  55%|█████▌    | 1661/3000 [15:27<09:33,  2.34it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.54e+6]Epoch 1662/3000:  55%|█████▌    | 1662/3000 [15:28<09:38,  2.31it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.54e+6]Epoch 1662/3000:  55%|█████▌    | 1662/3000 [15:28<09:38,  2.31it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1663/3000:  55%|█████▌    | 1662/3000 [15:28<09:38,  2.31it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1663/3000:  55%|█████▌    | 1663/3000 [15:28<09:49,  2.27it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1663/3000:  55%|█████▌    | 1663/3000 [15:28<09:49,  2.27it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1664/3000:  55%|█████▌    | 1663/3000 [15:28<09:49,  2.27it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1664/3000:  55%|█████▌    | 1664/3000 [15:28<09:35,  2.32it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1664/3000:  55%|█████▌    | 1664/3000 [15:28<09:35,  2.32it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.54e+6]Epoch 1665/3000:  55%|█████▌    | 1664/3000 [15:28<09:35,  2.32it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.54e+6]Epoch 1665/3000:  56%|█████▌    | 1665/3000 [15:29<09:50,  2.26it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.54e+6]Epoch 1665/3000:  56%|█████▌    | 1665/3000 [15:29<09:50,  2.26it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1666/3000:  56%|█████▌    | 1665/3000 [15:29<09:50,  2.26it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1666/3000:  56%|█████▌    | 1666/3000 [15:29<09:40,  2.30it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1666/3000:  56%|█████▌    | 1666/3000 [15:29<09:40,  2.30it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1667/3000:  56%|█████▌    | 1666/3000 [15:29<09:40,  2.30it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1667/3000:  56%|█████▌    | 1667/3000 [15:30<09:47,  2.27it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1667/3000:  56%|█████▌    | 1667/3000 [15:30<09:47,  2.27it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1668/3000:  56%|█████▌    | 1667/3000 [15:30<09:47,  2.27it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1668/3000:  56%|█████▌    | 1668/3000 [15:30<09:26,  2.35it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1668/3000:  56%|█████▌    | 1668/3000 [15:30<09:26,  2.35it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1669/3000:  56%|█████▌    | 1668/3000 [15:30<09:26,  2.35it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1669/3000:  56%|█████▌    | 1669/3000 [15:31<09:11,  2.41it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1669/3000:  56%|█████▌    | 1669/3000 [15:31<09:11,  2.41it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1670/3000:  56%|█████▌    | 1669/3000 [15:31<09:11,  2.41it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1670/3000:  56%|█████▌    | 1670/3000 [15:31<09:17,  2.38it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1670/3000:  56%|█████▌    | 1670/3000 [15:31<09:17,  2.38it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1671/3000:  56%|█████▌    | 1670/3000 [15:31<09:17,  2.38it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1671/3000:  56%|█████▌    | 1671/3000 [15:31<08:58,  2.47it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1671/3000:  56%|█████▌    | 1671/3000 [15:31<08:58,  2.47it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.54e+6]Epoch 1672/3000:  56%|█████▌    | 1671/3000 [15:31<08:58,  2.47it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.54e+6]Epoch 1672/3000:  56%|█████▌    | 1672/3000 [15:32<09:00,  2.46it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.54e+6]Epoch 1672/3000:  56%|█████▌    | 1672/3000 [15:32<09:00,  2.46it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1673/3000:  56%|█████▌    | 1672/3000 [15:32<09:00,  2.46it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1673/3000:  56%|█████▌    | 1673/3000 [15:32<09:17,  2.38it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1673/3000:  56%|█████▌    | 1673/3000 [15:32<09:17,  2.38it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.54e+6]Epoch 1674/3000:  56%|█████▌    | 1673/3000 [15:32<09:17,  2.38it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.54e+6]Epoch 1674/3000:  56%|█████▌    | 1674/3000 [15:33<09:26,  2.34it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.54e+6]Epoch 1674/3000:  56%|█████▌    | 1674/3000 [15:33<09:26,  2.34it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1675/3000:  56%|█████▌    | 1674/3000 [15:33<09:26,  2.34it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1675/3000:  56%|█████▌    | 1675/3000 [15:33<08:58,  2.46it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1675/3000:  56%|█████▌    | 1675/3000 [15:33<08:58,  2.46it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1676/3000:  56%|█████▌    | 1675/3000 [15:33<08:58,  2.46it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1676/3000:  56%|█████▌    | 1676/3000 [15:34<09:11,  2.40it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1676/3000:  56%|█████▌    | 1676/3000 [15:34<09:11,  2.40it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1677/3000:  56%|█████▌    | 1676/3000 [15:34<09:11,  2.40it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1677/3000:  56%|█████▌    | 1677/3000 [15:34<09:14,  2.39it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1677/3000:  56%|█████▌    | 1677/3000 [15:34<09:14,  2.39it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1678/3000:  56%|█████▌    | 1677/3000 [15:34<09:14,  2.39it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1678/3000:  56%|█████▌    | 1678/3000 [15:34<09:13,  2.39it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1678/3000:  56%|█████▌    | 1678/3000 [15:34<09:13,  2.39it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.54e+6]Epoch 1679/3000:  56%|█████▌    | 1678/3000 [15:34<09:13,  2.39it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.54e+6]Epoch 1679/3000:  56%|█████▌    | 1679/3000 [15:35<09:32,  2.31it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.54e+6]Epoch 1679/3000:  56%|█████▌    | 1679/3000 [15:35<09:32,  2.31it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1680/3000:  56%|█████▌    | 1679/3000 [15:35<09:32,  2.31it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1680/3000:  56%|█████▌    | 1680/3000 [15:35<09:55,  2.22it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1680/3000:  56%|█████▌    | 1680/3000 [15:35<09:55,  2.22it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.54e+6]Epoch 1681/3000:  56%|█████▌    | 1680/3000 [15:35<09:55,  2.22it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.54e+6]Epoch 1681/3000:  56%|█████▌    | 1681/3000 [15:36<09:58,  2.20it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.54e+6]Epoch 1681/3000:  56%|█████▌    | 1681/3000 [15:36<09:58,  2.20it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.54e+6]Epoch 1682/3000:  56%|█████▌    | 1681/3000 [15:36<09:58,  2.20it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.54e+6]Epoch 1682/3000:  56%|█████▌    | 1682/3000 [15:36<10:09,  2.16it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.54e+6]Epoch 1682/3000:  56%|█████▌    | 1682/3000 [15:36<10:09,  2.16it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.54e+6]Epoch 1683/3000:  56%|█████▌    | 1682/3000 [15:36<10:09,  2.16it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.54e+6]Epoch 1683/3000:  56%|█████▌    | 1683/3000 [15:37<09:48,  2.24it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.54e+6]Epoch 1683/3000:  56%|█████▌    | 1683/3000 [15:37<09:48,  2.24it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.54e+6]Epoch 1684/3000:  56%|█████▌    | 1683/3000 [15:37<09:48,  2.24it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.54e+6]Epoch 1684/3000:  56%|█████▌    | 1684/3000 [15:37<09:34,  2.29it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.54e+6]Epoch 1684/3000:  56%|█████▌    | 1684/3000 [15:37<09:34,  2.29it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1685/3000:  56%|█████▌    | 1684/3000 [15:37<09:34,  2.29it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1685/3000:  56%|█████▌    | 1685/3000 [15:37<09:15,  2.37it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1685/3000:  56%|█████▌    | 1685/3000 [15:37<09:15,  2.37it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1686/3000:  56%|█████▌    | 1685/3000 [15:37<09:15,  2.37it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1686/3000:  56%|█████▌    | 1686/3000 [15:38<09:53,  2.22it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1686/3000:  56%|█████▌    | 1686/3000 [15:38<09:53,  2.22it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.54e+6]Epoch 1687/3000:  56%|█████▌    | 1686/3000 [15:38<09:53,  2.22it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.54e+6]Epoch 1687/3000:  56%|█████▌    | 1687/3000 [15:38<09:45,  2.24it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.54e+6]Epoch 1687/3000:  56%|█████▌    | 1687/3000 [15:38<09:45,  2.24it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.54e+6]Epoch 1688/3000:  56%|█████▌    | 1687/3000 [15:38<09:45,  2.24it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.54e+6]Epoch 1688/3000:  56%|█████▋    | 1688/3000 [15:39<09:50,  2.22it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.54e+6]Epoch 1688/3000:  56%|█████▋    | 1688/3000 [15:39<09:50,  2.22it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.54e+6]Epoch 1689/3000:  56%|█████▋    | 1688/3000 [15:39<09:50,  2.22it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.54e+6]Epoch 1689/3000:  56%|█████▋    | 1689/3000 [15:39<09:58,  2.19it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.54e+6]Epoch 1689/3000:  56%|█████▋    | 1689/3000 [15:39<09:58,  2.19it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.54e+6]Epoch 1690/3000:  56%|█████▋    | 1689/3000 [15:39<09:58,  2.19it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.54e+6]Epoch 1690/3000:  56%|█████▋    | 1690/3000 [15:40<10:01,  2.18it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.54e+6]Epoch 1690/3000:  56%|█████▋    | 1690/3000 [15:40<10:01,  2.18it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1691/3000:  56%|█████▋    | 1690/3000 [15:40<10:01,  2.18it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1691/3000:  56%|█████▋    | 1691/3000 [15:40<09:45,  2.24it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1691/3000:  56%|█████▋    | 1691/3000 [15:40<09:45,  2.24it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.54e+6]Epoch 1692/3000:  56%|█████▋    | 1691/3000 [15:40<09:45,  2.24it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.54e+6]Epoch 1692/3000:  56%|█████▋    | 1692/3000 [15:41<10:01,  2.17it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.54e+6]Epoch 1692/3000:  56%|█████▋    | 1692/3000 [15:41<10:01,  2.17it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.54e+6]Epoch 1693/3000:  56%|█████▋    | 1692/3000 [15:41<10:01,  2.17it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.54e+6]Epoch 1693/3000:  56%|█████▋    | 1693/3000 [15:41<10:35,  2.06it/s, v_num=1, train_loss_step=1.59e+6, train_loss_epoch=1.54e+6]Epoch 1693/3000:  56%|█████▋    | 1693/3000 [15:41<10:35,  2.06it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.54e+6]Epoch 1694/3000:  56%|█████▋    | 1693/3000 [15:41<10:35,  2.06it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.54e+6]Epoch 1694/3000:  56%|█████▋    | 1694/3000 [15:42<10:43,  2.03it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.54e+6]Epoch 1694/3000:  56%|█████▋    | 1694/3000 [15:42<10:43,  2.03it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1695/3000:  56%|█████▋    | 1694/3000 [15:42<10:43,  2.03it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1695/3000:  56%|█████▋    | 1695/3000 [15:42<10:17,  2.11it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.54e+6]Epoch 1695/3000:  56%|█████▋    | 1695/3000 [15:42<10:17,  2.11it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1696/3000:  56%|█████▋    | 1695/3000 [15:42<10:17,  2.11it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1696/3000:  57%|█████▋    | 1696/3000 [15:43<09:42,  2.24it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.54e+6]Epoch 1696/3000:  57%|█████▋    | 1696/3000 [15:43<09:42,  2.24it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1697/3000:  57%|█████▋    | 1696/3000 [15:43<09:42,  2.24it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1697/3000:  57%|█████▋    | 1697/3000 [15:43<09:38,  2.25it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.54e+6]Epoch 1697/3000:  57%|█████▋    | 1697/3000 [15:43<09:38,  2.25it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.54e+6]Epoch 1698/3000:  57%|█████▋    | 1697/3000 [15:43<09:38,  2.25it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.54e+6]Epoch 1698/3000:  57%|█████▋    | 1698/3000 [15:43<09:40,  2.24it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.54e+6]Epoch 1698/3000:  57%|█████▋    | 1698/3000 [15:43<09:40,  2.24it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.54e+6]Epoch 1699/3000:  57%|█████▋    | 1698/3000 [15:43<09:40,  2.24it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.54e+6]Epoch 1699/3000:  57%|█████▋    | 1699/3000 [15:44<09:37,  2.25it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.54e+6]Epoch 1699/3000:  57%|█████▋    | 1699/3000 [15:44<09:37,  2.25it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.54e+6]Epoch 1700/3000:  57%|█████▋    | 1699/3000 [15:44<09:37,  2.25it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.54e+6]Epoch 1700/3000:  57%|█████▋    | 1700/3000 [15:44<09:43,  2.23it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.54e+6]Epoch 1700/3000:  57%|█████▋    | 1700/3000 [15:44<09:43,  2.23it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.53e+6]Epoch 1701/3000:  57%|█████▋    | 1700/3000 [15:44<09:43,  2.23it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.53e+6]Epoch 1701/3000:  57%|█████▋    | 1701/3000 [15:45<09:39,  2.24it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.53e+6]Epoch 1701/3000:  57%|█████▋    | 1701/3000 [15:45<09:39,  2.24it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1702/3000:  57%|█████▋    | 1701/3000 [15:45<09:39,  2.24it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1702/3000:  57%|█████▋    | 1702/3000 [15:45<09:48,  2.21it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1702/3000:  57%|█████▋    | 1702/3000 [15:45<09:48,  2.21it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.53e+6] Epoch 1703/3000:  57%|█████▋    | 1702/3000 [15:45<09:48,  2.21it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.53e+6]Epoch 1703/3000:  57%|█████▋    | 1703/3000 [15:46<10:01,  2.16it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.53e+6]Epoch 1703/3000:  57%|█████▋    | 1703/3000 [15:46<10:01,  2.16it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.53e+6]Epoch 1704/3000:  57%|█████▋    | 1703/3000 [15:46<10:01,  2.16it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.53e+6]Epoch 1704/3000:  57%|█████▋    | 1704/3000 [15:46<09:34,  2.25it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.53e+6]Epoch 1704/3000:  57%|█████▋    | 1704/3000 [15:46<09:34,  2.25it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1705/3000:  57%|█████▋    | 1704/3000 [15:46<09:34,  2.25it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1705/3000:  57%|█████▋    | 1705/3000 [15:47<09:45,  2.21it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1705/3000:  57%|█████▋    | 1705/3000 [15:47<09:45,  2.21it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1706/3000:  57%|█████▋    | 1705/3000 [15:47<09:45,  2.21it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1706/3000:  57%|█████▋    | 1706/3000 [15:47<09:35,  2.25it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1706/3000:  57%|█████▋    | 1706/3000 [15:47<09:35,  2.25it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.53e+6]Epoch 1707/3000:  57%|█████▋    | 1706/3000 [15:47<09:35,  2.25it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.53e+6]Epoch 1707/3000:  57%|█████▋    | 1707/3000 [15:47<09:18,  2.31it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.53e+6]Epoch 1707/3000:  57%|█████▋    | 1707/3000 [15:47<09:18,  2.31it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.53e+6]Epoch 1708/3000:  57%|█████▋    | 1707/3000 [15:47<09:18,  2.31it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.53e+6]Epoch 1708/3000:  57%|█████▋    | 1708/3000 [15:48<09:15,  2.32it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.53e+6]Epoch 1708/3000:  57%|█████▋    | 1708/3000 [15:48<09:15,  2.32it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1709/3000:  57%|█████▋    | 1708/3000 [15:48<09:15,  2.32it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1709/3000:  57%|█████▋    | 1709/3000 [15:48<09:03,  2.38it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1709/3000:  57%|█████▋    | 1709/3000 [15:48<09:03,  2.38it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1710/3000:  57%|█████▋    | 1709/3000 [15:48<09:03,  2.38it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1710/3000:  57%|█████▋    | 1710/3000 [15:49<09:04,  2.37it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1710/3000:  57%|█████▋    | 1710/3000 [15:49<09:04,  2.37it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.53e+6]Epoch 1711/3000:  57%|█████▋    | 1710/3000 [15:49<09:04,  2.37it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.53e+6]Epoch 1711/3000:  57%|█████▋    | 1711/3000 [15:49<09:27,  2.27it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.53e+6]Epoch 1711/3000:  57%|█████▋    | 1711/3000 [15:49<09:27,  2.27it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.53e+6]Epoch 1712/3000:  57%|█████▋    | 1711/3000 [15:49<09:27,  2.27it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.53e+6]Epoch 1712/3000:  57%|█████▋    | 1712/3000 [15:50<09:31,  2.25it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.53e+6]Epoch 1712/3000:  57%|█████▋    | 1712/3000 [15:50<09:31,  2.25it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.53e+6]Epoch 1713/3000:  57%|█████▋    | 1712/3000 [15:50<09:31,  2.25it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.53e+6]Epoch 1713/3000:  57%|█████▋    | 1713/3000 [15:50<10:08,  2.11it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.53e+6]Epoch 1713/3000:  57%|█████▋    | 1713/3000 [15:50<10:08,  2.11it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1714/3000:  57%|█████▋    | 1713/3000 [15:50<10:08,  2.11it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1714/3000:  57%|█████▋    | 1714/3000 [15:51<09:54,  2.16it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1714/3000:  57%|█████▋    | 1714/3000 [15:51<09:54,  2.16it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1715/3000:  57%|█████▋    | 1714/3000 [15:51<09:54,  2.16it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1715/3000:  57%|█████▋    | 1715/3000 [15:51<10:17,  2.08it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1715/3000:  57%|█████▋    | 1715/3000 [15:51<10:17,  2.08it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1716/3000:  57%|█████▋    | 1715/3000 [15:51<10:17,  2.08it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1716/3000:  57%|█████▋    | 1716/3000 [15:52<09:46,  2.19it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1716/3000:  57%|█████▋    | 1716/3000 [15:52<09:46,  2.19it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.53e+6]Epoch 1717/3000:  57%|█████▋    | 1716/3000 [15:52<09:46,  2.19it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.53e+6]Epoch 1717/3000:  57%|█████▋    | 1717/3000 [15:52<09:45,  2.19it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.53e+6]Epoch 1717/3000:  57%|█████▋    | 1717/3000 [15:52<09:45,  2.19it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.53e+6]Epoch 1718/3000:  57%|█████▋    | 1717/3000 [15:52<09:45,  2.19it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.53e+6]Epoch 1718/3000:  57%|█████▋    | 1718/3000 [15:52<09:26,  2.26it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.53e+6]Epoch 1718/3000:  57%|█████▋    | 1718/3000 [15:52<09:26,  2.26it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.53e+6] Epoch 1719/3000:  57%|█████▋    | 1718/3000 [15:52<09:26,  2.26it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.53e+6]Epoch 1719/3000:  57%|█████▋    | 1719/3000 [15:53<09:22,  2.28it/s, v_num=1, train_loss_step=1.6e+6, train_loss_epoch=1.53e+6]Epoch 1719/3000:  57%|█████▋    | 1719/3000 [15:53<09:22,  2.28it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1720/3000:  57%|█████▋    | 1719/3000 [15:53<09:22,  2.28it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1720/3000:  57%|█████▋    | 1720/3000 [15:53<09:22,  2.27it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1720/3000:  57%|█████▋    | 1720/3000 [15:53<09:22,  2.27it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.53e+6]Epoch 1721/3000:  57%|█████▋    | 1720/3000 [15:53<09:22,  2.27it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.53e+6]Epoch 1721/3000:  57%|█████▋    | 1721/3000 [15:54<08:47,  2.42it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.53e+6]Epoch 1721/3000:  57%|█████▋    | 1721/3000 [15:54<08:47,  2.42it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.53e+6]Epoch 1722/3000:  57%|█████▋    | 1721/3000 [15:54<08:47,  2.42it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.53e+6]Epoch 1722/3000:  57%|█████▋    | 1722/3000 [15:54<08:57,  2.38it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.53e+6]Epoch 1722/3000:  57%|█████▋    | 1722/3000 [15:54<08:57,  2.38it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1723/3000:  57%|█████▋    | 1722/3000 [15:54<08:57,  2.38it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1723/3000:  57%|█████▋    | 1723/3000 [15:54<08:40,  2.45it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1723/3000:  57%|█████▋    | 1723/3000 [15:54<08:40,  2.45it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1724/3000:  57%|█████▋    | 1723/3000 [15:54<08:40,  2.45it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1724/3000:  57%|█████▋    | 1724/3000 [15:55<08:40,  2.45it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1724/3000:  57%|█████▋    | 1724/3000 [15:55<08:40,  2.45it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.53e+6]Epoch 1725/3000:  57%|█████▋    | 1724/3000 [15:55<08:40,  2.45it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.53e+6]Epoch 1725/3000:  57%|█████▊    | 1725/3000 [15:55<08:59,  2.36it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.53e+6]Epoch 1725/3000:  57%|█████▊    | 1725/3000 [15:55<08:59,  2.36it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.53e+6]Epoch 1726/3000:  57%|█████▊    | 1725/3000 [15:55<08:59,  2.36it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.53e+6]Epoch 1726/3000:  58%|█████▊    | 1726/3000 [15:56<08:13,  2.58it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.53e+6]Epoch 1726/3000:  58%|█████▊    | 1726/3000 [15:56<08:13,  2.58it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1727/3000:  58%|█████▊    | 1726/3000 [15:56<08:13,  2.58it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1727/3000:  58%|█████▊    | 1727/3000 [15:56<08:52,  2.39it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1727/3000:  58%|█████▊    | 1727/3000 [15:56<08:52,  2.39it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1728/3000:  58%|█████▊    | 1727/3000 [15:56<08:52,  2.39it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1728/3000:  58%|█████▊    | 1728/3000 [15:57<09:20,  2.27it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1728/3000:  58%|█████▊    | 1728/3000 [15:57<09:20,  2.27it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.53e+6]Epoch 1729/3000:  58%|█████▊    | 1728/3000 [15:57<09:20,  2.27it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.53e+6]Epoch 1729/3000:  58%|█████▊    | 1729/3000 [15:57<09:19,  2.27it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.53e+6]Epoch 1729/3000:  58%|█████▊    | 1729/3000 [15:57<09:19,  2.27it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1730/3000:  58%|█████▊    | 1729/3000 [15:57<09:19,  2.27it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1730/3000:  58%|█████▊    | 1730/3000 [15:58<09:22,  2.26it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1730/3000:  58%|█████▊    | 1730/3000 [15:58<09:22,  2.26it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1731/3000:  58%|█████▊    | 1730/3000 [15:58<09:22,  2.26it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1731/3000:  58%|█████▊    | 1731/3000 [15:58<09:35,  2.21it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1731/3000:  58%|█████▊    | 1731/3000 [15:58<09:35,  2.21it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1732/3000:  58%|█████▊    | 1731/3000 [15:58<09:35,  2.21it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1732/3000:  58%|█████▊    | 1732/3000 [15:58<08:59,  2.35it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1732/3000:  58%|█████▊    | 1732/3000 [15:58<08:59,  2.35it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1733/3000:  58%|█████▊    | 1732/3000 [15:58<08:59,  2.35it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1733/3000:  58%|█████▊    | 1733/3000 [15:59<08:50,  2.39it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1733/3000:  58%|█████▊    | 1733/3000 [15:59<08:50,  2.39it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1734/3000:  58%|█████▊    | 1733/3000 [15:59<08:50,  2.39it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1734/3000:  58%|█████▊    | 1734/3000 [15:59<09:05,  2.32it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1734/3000:  58%|█████▊    | 1734/3000 [15:59<09:05,  2.32it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1735/3000:  58%|█████▊    | 1734/3000 [15:59<09:05,  2.32it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1735/3000:  58%|█████▊    | 1735/3000 [16:00<08:56,  2.36it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1735/3000:  58%|█████▊    | 1735/3000 [16:00<08:56,  2.36it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1736/3000:  58%|█████▊    | 1735/3000 [16:00<08:56,  2.36it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1736/3000:  58%|█████▊    | 1736/3000 [16:00<09:03,  2.32it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1736/3000:  58%|█████▊    | 1736/3000 [16:00<09:03,  2.32it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1737/3000:  58%|█████▊    | 1736/3000 [16:00<09:03,  2.32it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1737/3000:  58%|█████▊    | 1737/3000 [16:00<08:57,  2.35it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1737/3000:  58%|█████▊    | 1737/3000 [16:00<08:57,  2.35it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1738/3000:  58%|█████▊    | 1737/3000 [16:00<08:57,  2.35it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1738/3000:  58%|█████▊    | 1738/3000 [16:01<09:04,  2.32it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1738/3000:  58%|█████▊    | 1738/3000 [16:01<09:04,  2.32it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.53e+6] Epoch 1739/3000:  58%|█████▊    | 1738/3000 [16:01<09:04,  2.32it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.53e+6]Epoch 1739/3000:  58%|█████▊    | 1739/3000 [16:01<09:05,  2.31it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.53e+6]Epoch 1739/3000:  58%|█████▊    | 1739/3000 [16:01<09:05,  2.31it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.53e+6]Epoch 1740/3000:  58%|█████▊    | 1739/3000 [16:01<09:05,  2.31it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.53e+6]Epoch 1740/3000:  58%|█████▊    | 1740/3000 [16:02<09:19,  2.25it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.53e+6]Epoch 1740/3000:  58%|█████▊    | 1740/3000 [16:02<09:19,  2.25it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1741/3000:  58%|█████▊    | 1740/3000 [16:02<09:19,  2.25it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1741/3000:  58%|█████▊    | 1741/3000 [16:02<09:31,  2.20it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1741/3000:  58%|█████▊    | 1741/3000 [16:02<09:31,  2.20it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.53e+6]Epoch 1742/3000:  58%|█████▊    | 1741/3000 [16:02<09:31,  2.20it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.53e+6]Epoch 1742/3000:  58%|█████▊    | 1742/3000 [16:03<09:12,  2.28it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.53e+6]Epoch 1742/3000:  58%|█████▊    | 1742/3000 [16:03<09:12,  2.28it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1743/3000:  58%|█████▊    | 1742/3000 [16:03<09:12,  2.28it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1743/3000:  58%|█████▊    | 1743/3000 [16:03<08:58,  2.34it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1743/3000:  58%|█████▊    | 1743/3000 [16:03<08:58,  2.34it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.53e+6]Epoch 1744/3000:  58%|█████▊    | 1743/3000 [16:03<08:58,  2.34it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.53e+6]Epoch 1744/3000:  58%|█████▊    | 1744/3000 [16:04<08:51,  2.36it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.53e+6]Epoch 1744/3000:  58%|█████▊    | 1744/3000 [16:04<08:51,  2.36it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1745/3000:  58%|█████▊    | 1744/3000 [16:04<08:51,  2.36it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1745/3000:  58%|█████▊    | 1745/3000 [16:04<09:18,  2.25it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.53e+6]Epoch 1745/3000:  58%|█████▊    | 1745/3000 [16:04<09:18,  2.25it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1746/3000:  58%|█████▊    | 1745/3000 [16:04<09:18,  2.25it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1746/3000:  58%|█████▊    | 1746/3000 [16:04<09:19,  2.24it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1746/3000:  58%|█████▊    | 1746/3000 [16:04<09:19,  2.24it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1747/3000:  58%|█████▊    | 1746/3000 [16:04<09:19,  2.24it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1747/3000:  58%|█████▊    | 1747/3000 [16:05<09:18,  2.24it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.53e+6]Epoch 1747/3000:  58%|█████▊    | 1747/3000 [16:05<09:18,  2.24it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.53e+6] Epoch 1748/3000:  58%|█████▊    | 1747/3000 [16:05<09:18,  2.24it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.53e+6]Epoch 1748/3000:  58%|█████▊    | 1748/3000 [16:05<08:57,  2.33it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.53e+6]Epoch 1748/3000:  58%|█████▊    | 1748/3000 [16:05<08:57,  2.33it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.53e+6]Epoch 1749/3000:  58%|█████▊    | 1748/3000 [16:05<08:57,  2.33it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.53e+6]Epoch 1749/3000:  58%|█████▊    | 1749/3000 [16:06<09:22,  2.22it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.53e+6]Epoch 1749/3000:  58%|█████▊    | 1749/3000 [16:06<09:22,  2.22it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.53e+6]Epoch 1750/3000:  58%|█████▊    | 1749/3000 [16:06<09:22,  2.22it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.53e+6]Epoch 1750/3000:  58%|█████▊    | 1750/3000 [16:06<09:42,  2.15it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.53e+6]Epoch 1750/3000:  58%|█████▊    | 1750/3000 [16:06<09:42,  2.15it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.53e+6]Epoch 1751/3000:  58%|█████▊    | 1750/3000 [16:06<09:42,  2.15it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.53e+6]Epoch 1751/3000:  58%|█████▊    | 1751/3000 [16:07<09:55,  2.10it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.53e+6]Epoch 1751/3000:  58%|█████▊    | 1751/3000 [16:07<09:55,  2.10it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1752/3000:  58%|█████▊    | 1751/3000 [16:07<09:55,  2.10it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1752/3000:  58%|█████▊    | 1752/3000 [16:07<09:44,  2.13it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.53e+6]Epoch 1752/3000:  58%|█████▊    | 1752/3000 [16:07<09:44,  2.13it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.53e+6]Epoch 1753/3000:  58%|█████▊    | 1752/3000 [16:07<09:44,  2.13it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.53e+6]Epoch 1753/3000:  58%|█████▊    | 1753/3000 [16:08<10:07,  2.05it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.53e+6]Epoch 1753/3000:  58%|█████▊    | 1753/3000 [16:08<10:07,  2.05it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.52e+6]Epoch 1754/3000:  58%|█████▊    | 1753/3000 [16:08<10:07,  2.05it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.52e+6]Epoch 1754/3000:  58%|█████▊    | 1754/3000 [16:08<09:52,  2.10it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.52e+6]Epoch 1754/3000:  58%|█████▊    | 1754/3000 [16:08<09:52,  2.10it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.52e+6]Epoch 1755/3000:  58%|█████▊    | 1754/3000 [16:08<09:52,  2.10it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.52e+6]Epoch 1755/3000:  58%|█████▊    | 1755/3000 [16:09<09:26,  2.20it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.52e+6]Epoch 1755/3000:  58%|█████▊    | 1755/3000 [16:09<09:26,  2.20it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1756/3000:  58%|█████▊    | 1755/3000 [16:09<09:26,  2.20it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1756/3000:  59%|█████▊    | 1756/3000 [16:09<09:28,  2.19it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1756/3000:  59%|█████▊    | 1756/3000 [16:09<09:28,  2.19it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.52e+6]Epoch 1757/3000:  59%|█████▊    | 1756/3000 [16:09<09:28,  2.19it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.52e+6]Epoch 1757/3000:  59%|█████▊    | 1757/3000 [16:09<09:01,  2.29it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.52e+6]Epoch 1757/3000:  59%|█████▊    | 1757/3000 [16:09<09:01,  2.29it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.52e+6]Epoch 1758/3000:  59%|█████▊    | 1757/3000 [16:09<09:01,  2.29it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.52e+6]Epoch 1758/3000:  59%|█████▊    | 1758/3000 [16:10<09:09,  2.26it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.52e+6]Epoch 1758/3000:  59%|█████▊    | 1758/3000 [16:10<09:09,  2.26it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6] Epoch 1759/3000:  59%|█████▊    | 1758/3000 [16:10<09:09,  2.26it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1759/3000:  59%|█████▊    | 1759/3000 [16:10<09:21,  2.21it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1759/3000:  59%|█████▊    | 1759/3000 [16:10<09:21,  2.21it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.52e+6]Epoch 1760/3000:  59%|█████▊    | 1759/3000 [16:10<09:21,  2.21it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.52e+6]Epoch 1760/3000:  59%|█████▊    | 1760/3000 [16:11<09:38,  2.14it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.52e+6]Epoch 1760/3000:  59%|█████▊    | 1760/3000 [16:11<09:38,  2.14it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.52e+6]Epoch 1761/3000:  59%|█████▊    | 1760/3000 [16:11<09:38,  2.14it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.52e+6]Epoch 1761/3000:  59%|█████▊    | 1761/3000 [16:11<09:28,  2.18it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.52e+6]Epoch 1761/3000:  59%|█████▊    | 1761/3000 [16:11<09:28,  2.18it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.52e+6]Epoch 1762/3000:  59%|█████▊    | 1761/3000 [16:11<09:28,  2.18it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.52e+6]Epoch 1762/3000:  59%|█████▊    | 1762/3000 [16:12<09:39,  2.14it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.52e+6]Epoch 1762/3000:  59%|█████▊    | 1762/3000 [16:12<09:39,  2.14it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.52e+6]Epoch 1763/3000:  59%|█████▊    | 1762/3000 [16:12<09:39,  2.14it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.52e+6]Epoch 1763/3000:  59%|█████▉    | 1763/3000 [16:12<09:33,  2.16it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.52e+6]Epoch 1763/3000:  59%|█████▉    | 1763/3000 [16:12<09:33,  2.16it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6] Epoch 1764/3000:  59%|█████▉    | 1763/3000 [16:12<09:33,  2.16it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1764/3000:  59%|█████▉    | 1764/3000 [16:13<09:25,  2.19it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1764/3000:  59%|█████▉    | 1764/3000 [16:13<09:25,  2.19it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1765/3000:  59%|█████▉    | 1764/3000 [16:13<09:25,  2.19it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1765/3000:  59%|█████▉    | 1765/3000 [16:13<09:08,  2.25it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1765/3000:  59%|█████▉    | 1765/3000 [16:13<09:08,  2.25it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.52e+6]Epoch 1766/3000:  59%|█████▉    | 1765/3000 [16:13<09:08,  2.25it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.52e+6]Epoch 1766/3000:  59%|█████▉    | 1766/3000 [16:14<09:06,  2.26it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.52e+6]Epoch 1766/3000:  59%|█████▉    | 1766/3000 [16:14<09:06,  2.26it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.52e+6]Epoch 1767/3000:  59%|█████▉    | 1766/3000 [16:14<09:06,  2.26it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.52e+6]Epoch 1767/3000:  59%|█████▉    | 1767/3000 [16:14<08:43,  2.36it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.52e+6]Epoch 1767/3000:  59%|█████▉    | 1767/3000 [16:14<08:43,  2.36it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.52e+6]Epoch 1768/3000:  59%|█████▉    | 1767/3000 [16:14<08:43,  2.36it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.52e+6]Epoch 1768/3000:  59%|█████▉    | 1768/3000 [16:14<08:16,  2.48it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.52e+6]Epoch 1768/3000:  59%|█████▉    | 1768/3000 [16:14<08:16,  2.48it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1769/3000:  59%|█████▉    | 1768/3000 [16:14<08:16,  2.48it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1769/3000:  59%|█████▉    | 1769/3000 [16:15<08:28,  2.42it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1769/3000:  59%|█████▉    | 1769/3000 [16:15<08:28,  2.42it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.52e+6]Epoch 1770/3000:  59%|█████▉    | 1769/3000 [16:15<08:28,  2.42it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.52e+6]Epoch 1770/3000:  59%|█████▉    | 1770/3000 [16:15<08:31,  2.40it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.52e+6]Epoch 1770/3000:  59%|█████▉    | 1770/3000 [16:15<08:31,  2.40it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.52e+6]Epoch 1771/3000:  59%|█████▉    | 1770/3000 [16:15<08:31,  2.40it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.52e+6]Epoch 1771/3000:  59%|█████▉    | 1771/3000 [16:16<08:33,  2.39it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.52e+6]Epoch 1771/3000:  59%|█████▉    | 1771/3000 [16:16<08:33,  2.39it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.52e+6]Epoch 1772/3000:  59%|█████▉    | 1771/3000 [16:16<08:33,  2.39it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.52e+6]Epoch 1772/3000:  59%|█████▉    | 1772/3000 [16:16<08:37,  2.37it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.52e+6]Epoch 1772/3000:  59%|█████▉    | 1772/3000 [16:16<08:37,  2.37it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.52e+6]Epoch 1773/3000:  59%|█████▉    | 1772/3000 [16:16<08:37,  2.37it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.52e+6]Epoch 1773/3000:  59%|█████▉    | 1773/3000 [16:17<09:04,  2.25it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.52e+6]Epoch 1773/3000:  59%|█████▉    | 1773/3000 [16:17<09:04,  2.25it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6] Epoch 1774/3000:  59%|█████▉    | 1773/3000 [16:17<09:04,  2.25it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1774/3000:  59%|█████▉    | 1774/3000 [16:17<08:59,  2.27it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1774/3000:  59%|█████▉    | 1774/3000 [16:17<08:59,  2.27it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.52e+6]Epoch 1775/3000:  59%|█████▉    | 1774/3000 [16:17<08:59,  2.27it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.52e+6]Epoch 1775/3000:  59%|█████▉    | 1775/3000 [16:17<08:36,  2.37it/s, v_num=1, train_loss_step=1.58e+6, train_loss_epoch=1.52e+6]Epoch 1775/3000:  59%|█████▉    | 1775/3000 [16:17<08:36,  2.37it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1776/3000:  59%|█████▉    | 1775/3000 [16:17<08:36,  2.37it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1776/3000:  59%|█████▉    | 1776/3000 [16:18<08:47,  2.32it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1776/3000:  59%|█████▉    | 1776/3000 [16:18<08:47,  2.32it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.52e+6]Epoch 1777/3000:  59%|█████▉    | 1776/3000 [16:18<08:47,  2.32it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.52e+6]Epoch 1777/3000:  59%|█████▉    | 1777/3000 [16:18<08:51,  2.30it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.52e+6]Epoch 1777/3000:  59%|█████▉    | 1777/3000 [16:18<08:51,  2.30it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1778/3000:  59%|█████▉    | 1777/3000 [16:18<08:51,  2.30it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1778/3000:  59%|█████▉    | 1778/3000 [16:19<08:58,  2.27it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1778/3000:  59%|█████▉    | 1778/3000 [16:19<08:58,  2.27it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.52e+6]Epoch 1779/3000:  59%|█████▉    | 1778/3000 [16:19<08:58,  2.27it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.52e+6]Epoch 1779/3000:  59%|█████▉    | 1779/3000 [16:19<08:47,  2.31it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.52e+6]Epoch 1779/3000:  59%|█████▉    | 1779/3000 [16:19<08:47,  2.31it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1780/3000:  59%|█████▉    | 1779/3000 [16:19<08:47,  2.31it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1780/3000:  59%|█████▉    | 1780/3000 [16:20<08:49,  2.30it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1780/3000:  59%|█████▉    | 1780/3000 [16:20<08:49,  2.30it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.52e+6]Epoch 1781/3000:  59%|█████▉    | 1780/3000 [16:20<08:49,  2.30it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.52e+6]Epoch 1781/3000:  59%|█████▉    | 1781/3000 [16:20<08:56,  2.27it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.52e+6]Epoch 1781/3000:  59%|█████▉    | 1781/3000 [16:20<08:56,  2.27it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.52e+6]Epoch 1782/3000:  59%|█████▉    | 1781/3000 [16:20<08:56,  2.27it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.52e+6]Epoch 1782/3000:  59%|█████▉    | 1782/3000 [16:20<09:02,  2.25it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.52e+6]Epoch 1782/3000:  59%|█████▉    | 1782/3000 [16:20<09:02,  2.25it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.52e+6]Epoch 1783/3000:  59%|█████▉    | 1782/3000 [16:20<09:02,  2.25it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.52e+6]Epoch 1783/3000:  59%|█████▉    | 1783/3000 [16:21<09:01,  2.25it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.52e+6]Epoch 1783/3000:  59%|█████▉    | 1783/3000 [16:21<09:01,  2.25it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.52e+6]Epoch 1784/3000:  59%|█████▉    | 1783/3000 [16:21<09:01,  2.25it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.52e+6]Epoch 1784/3000:  59%|█████▉    | 1784/3000 [16:21<09:00,  2.25it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.52e+6]Epoch 1784/3000:  59%|█████▉    | 1784/3000 [16:21<09:00,  2.25it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.52e+6]Epoch 1785/3000:  59%|█████▉    | 1784/3000 [16:21<09:00,  2.25it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.52e+6]Epoch 1785/3000:  60%|█████▉    | 1785/3000 [16:22<09:03,  2.24it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.52e+6]Epoch 1785/3000:  60%|█████▉    | 1785/3000 [16:22<09:03,  2.24it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.52e+6]Epoch 1786/3000:  60%|█████▉    | 1785/3000 [16:22<09:03,  2.24it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.52e+6]Epoch 1786/3000:  60%|█████▉    | 1786/3000 [16:22<09:16,  2.18it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.52e+6]Epoch 1786/3000:  60%|█████▉    | 1786/3000 [16:22<09:16,  2.18it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.52e+6]Epoch 1787/3000:  60%|█████▉    | 1786/3000 [16:22<09:16,  2.18it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.52e+6]Epoch 1787/3000:  60%|█████▉    | 1787/3000 [16:23<09:35,  2.11it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.52e+6]Epoch 1787/3000:  60%|█████▉    | 1787/3000 [16:23<09:35,  2.11it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6] Epoch 1788/3000:  60%|█████▉    | 1787/3000 [16:23<09:35,  2.11it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1788/3000:  60%|█████▉    | 1788/3000 [16:23<09:36,  2.10it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1788/3000:  60%|█████▉    | 1788/3000 [16:23<09:36,  2.10it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1789/3000:  60%|█████▉    | 1788/3000 [16:23<09:36,  2.10it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1789/3000:  60%|█████▉    | 1789/3000 [16:24<09:23,  2.15it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1789/3000:  60%|█████▉    | 1789/3000 [16:24<09:23,  2.15it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1790/3000:  60%|█████▉    | 1789/3000 [16:24<09:23,  2.15it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1790/3000:  60%|█████▉    | 1790/3000 [16:24<09:06,  2.21it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1790/3000:  60%|█████▉    | 1790/3000 [16:24<09:06,  2.21it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.52e+6]Epoch 1791/3000:  60%|█████▉    | 1790/3000 [16:24<09:06,  2.21it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.52e+6]Epoch 1791/3000:  60%|█████▉    | 1791/3000 [16:24<08:20,  2.42it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.52e+6]Epoch 1791/3000:  60%|█████▉    | 1791/3000 [16:24<08:20,  2.42it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.52e+6]Epoch 1792/3000:  60%|█████▉    | 1791/3000 [16:24<08:20,  2.42it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.52e+6]Epoch 1792/3000:  60%|█████▉    | 1792/3000 [16:25<07:59,  2.52it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.52e+6]Epoch 1792/3000:  60%|█████▉    | 1792/3000 [16:25<07:59,  2.52it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.52e+6]Epoch 1793/3000:  60%|█████▉    | 1792/3000 [16:25<07:59,  2.52it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.52e+6]Epoch 1793/3000:  60%|█████▉    | 1793/3000 [16:25<07:34,  2.66it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.52e+6]Epoch 1793/3000:  60%|█████▉    | 1793/3000 [16:25<07:34,  2.66it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.52e+6]Epoch 1794/3000:  60%|█████▉    | 1793/3000 [16:25<07:34,  2.66it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.52e+6]Epoch 1794/3000:  60%|█████▉    | 1794/3000 [16:26<07:53,  2.55it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.52e+6]Epoch 1794/3000:  60%|█████▉    | 1794/3000 [16:26<07:53,  2.55it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1795/3000:  60%|█████▉    | 1794/3000 [16:26<07:53,  2.55it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1795/3000:  60%|█████▉    | 1795/3000 [16:26<08:31,  2.36it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1795/3000:  60%|█████▉    | 1795/3000 [16:26<08:31,  2.36it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.52e+6]Epoch 1796/3000:  60%|█████▉    | 1795/3000 [16:26<08:31,  2.36it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.52e+6]Epoch 1796/3000:  60%|█████▉    | 1796/3000 [16:27<08:32,  2.35it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.52e+6]Epoch 1796/3000:  60%|█████▉    | 1796/3000 [16:27<08:32,  2.35it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.52e+6]Epoch 1797/3000:  60%|█████▉    | 1796/3000 [16:27<08:32,  2.35it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.52e+6]Epoch 1797/3000:  60%|█████▉    | 1797/3000 [16:27<08:40,  2.31it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.52e+6]Epoch 1797/3000:  60%|█████▉    | 1797/3000 [16:27<08:40,  2.31it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.52e+6]Epoch 1798/3000:  60%|█████▉    | 1797/3000 [16:27<08:40,  2.31it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.52e+6]Epoch 1798/3000:  60%|█████▉    | 1798/3000 [16:27<08:50,  2.27it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.52e+6]Epoch 1798/3000:  60%|█████▉    | 1798/3000 [16:27<08:50,  2.27it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6] Epoch 1799/3000:  60%|█████▉    | 1798/3000 [16:27<08:50,  2.27it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1799/3000:  60%|█████▉    | 1799/3000 [16:28<09:04,  2.21it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1799/3000:  60%|█████▉    | 1799/3000 [16:28<09:04,  2.21it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.52e+6]Epoch 1800/3000:  60%|█████▉    | 1799/3000 [16:28<09:04,  2.21it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.52e+6]Epoch 1800/3000:  60%|██████    | 1800/3000 [16:28<07:58,  2.51it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.52e+6]Epoch 1800/3000:  60%|██████    | 1800/3000 [16:28<07:58,  2.51it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6] Epoch 1801/3000:  60%|██████    | 1800/3000 [16:28<07:58,  2.51it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1801/3000:  60%|██████    | 1801/3000 [16:29<08:10,  2.44it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1801/3000:  60%|██████    | 1801/3000 [16:29<08:10,  2.44it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.52e+6]Epoch 1802/3000:  60%|██████    | 1801/3000 [16:29<08:10,  2.44it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.52e+6]Epoch 1802/3000:  60%|██████    | 1802/3000 [16:29<08:29,  2.35it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.52e+6]Epoch 1802/3000:  60%|██████    | 1802/3000 [16:29<08:29,  2.35it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.52e+6]Epoch 1803/3000:  60%|██████    | 1802/3000 [16:29<08:29,  2.35it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.52e+6]Epoch 1803/3000:  60%|██████    | 1803/3000 [16:29<08:27,  2.36it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.52e+6]Epoch 1803/3000:  60%|██████    | 1803/3000 [16:29<08:27,  2.36it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.52e+6]Epoch 1804/3000:  60%|██████    | 1803/3000 [16:29<08:27,  2.36it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.52e+6]Epoch 1804/3000:  60%|██████    | 1804/3000 [16:30<08:58,  2.22it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.52e+6]Epoch 1804/3000:  60%|██████    | 1804/3000 [16:30<08:58,  2.22it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.52e+6]Epoch 1805/3000:  60%|██████    | 1804/3000 [16:30<08:58,  2.22it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.52e+6]Epoch 1805/3000:  60%|██████    | 1805/3000 [16:30<08:59,  2.21it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.52e+6]Epoch 1805/3000:  60%|██████    | 1805/3000 [16:30<08:59,  2.21it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6] Epoch 1806/3000:  60%|██████    | 1805/3000 [16:30<08:59,  2.21it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1806/3000:  60%|██████    | 1806/3000 [16:31<09:00,  2.21it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1806/3000:  60%|██████    | 1806/3000 [16:31<09:00,  2.21it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.52e+6]Epoch 1807/3000:  60%|██████    | 1806/3000 [16:31<09:00,  2.21it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.52e+6]Epoch 1807/3000:  60%|██████    | 1807/3000 [16:31<08:46,  2.27it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.52e+6]Epoch 1807/3000:  60%|██████    | 1807/3000 [16:31<08:46,  2.27it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.52e+6]Epoch 1808/3000:  60%|██████    | 1807/3000 [16:31<08:46,  2.27it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.52e+6]Epoch 1808/3000:  60%|██████    | 1808/3000 [16:32<08:40,  2.29it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.52e+6]Epoch 1808/3000:  60%|██████    | 1808/3000 [16:32<08:40,  2.29it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6] Epoch 1809/3000:  60%|██████    | 1808/3000 [16:32<08:40,  2.29it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1809/3000:  60%|██████    | 1809/3000 [16:32<08:46,  2.26it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.52e+6]Epoch 1809/3000:  60%|██████    | 1809/3000 [16:32<08:46,  2.26it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1810/3000:  60%|██████    | 1809/3000 [16:32<08:46,  2.26it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1810/3000:  60%|██████    | 1810/3000 [16:33<08:38,  2.29it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.52e+6]Epoch 1810/3000:  60%|██████    | 1810/3000 [16:33<08:38,  2.29it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.52e+6]Epoch 1811/3000:  60%|██████    | 1810/3000 [16:33<08:38,  2.29it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.52e+6]Epoch 1811/3000:  60%|██████    | 1811/3000 [16:33<08:37,  2.30it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.52e+6]Epoch 1811/3000:  60%|██████    | 1811/3000 [16:33<08:37,  2.30it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.52e+6]Epoch 1812/3000:  60%|██████    | 1811/3000 [16:33<08:37,  2.30it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.52e+6]Epoch 1812/3000:  60%|██████    | 1812/3000 [16:34<08:50,  2.24it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.52e+6]Epoch 1812/3000:  60%|██████    | 1812/3000 [16:34<08:50,  2.24it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1813/3000:  60%|██████    | 1812/3000 [16:34<08:50,  2.24it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1813/3000:  60%|██████    | 1813/3000 [16:34<08:34,  2.31it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1813/3000:  60%|██████    | 1813/3000 [16:34<08:34,  2.31it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1814/3000:  60%|██████    | 1813/3000 [16:34<08:34,  2.31it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1814/3000:  60%|██████    | 1814/3000 [16:34<08:21,  2.37it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1814/3000:  60%|██████    | 1814/3000 [16:34<08:21,  2.37it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1815/3000:  60%|██████    | 1814/3000 [16:34<08:21,  2.37it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1815/3000:  60%|██████    | 1815/3000 [16:35<08:31,  2.32it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1815/3000:  60%|██████    | 1815/3000 [16:35<08:31,  2.32it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.51e+6]Epoch 1816/3000:  60%|██████    | 1815/3000 [16:35<08:31,  2.32it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.51e+6]Epoch 1816/3000:  61%|██████    | 1816/3000 [16:35<08:17,  2.38it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.51e+6]Epoch 1816/3000:  61%|██████    | 1816/3000 [16:35<08:17,  2.38it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1817/3000:  61%|██████    | 1816/3000 [16:35<08:17,  2.38it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1817/3000:  61%|██████    | 1817/3000 [16:36<08:15,  2.39it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1817/3000:  61%|██████    | 1817/3000 [16:36<08:15,  2.39it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1818/3000:  61%|██████    | 1817/3000 [16:36<08:15,  2.39it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1818/3000:  61%|██████    | 1818/3000 [16:36<07:55,  2.48it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1818/3000:  61%|██████    | 1818/3000 [16:36<07:55,  2.48it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1819/3000:  61%|██████    | 1818/3000 [16:36<07:55,  2.48it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1819/3000:  61%|██████    | 1819/3000 [16:36<08:12,  2.40it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1819/3000:  61%|██████    | 1819/3000 [16:36<08:12,  2.40it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.51e+6]Epoch 1820/3000:  61%|██████    | 1819/3000 [16:36<08:12,  2.40it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.51e+6]Epoch 1820/3000:  61%|██████    | 1820/3000 [16:37<08:16,  2.38it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.51e+6]Epoch 1820/3000:  61%|██████    | 1820/3000 [16:37<08:16,  2.38it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1821/3000:  61%|██████    | 1820/3000 [16:37<08:16,  2.38it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1821/3000:  61%|██████    | 1821/3000 [16:37<08:33,  2.29it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1821/3000:  61%|██████    | 1821/3000 [16:37<08:33,  2.29it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1822/3000:  61%|██████    | 1821/3000 [16:37<08:33,  2.29it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1822/3000:  61%|██████    | 1822/3000 [16:38<07:56,  2.47it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1822/3000:  61%|██████    | 1822/3000 [16:38<07:56,  2.47it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1823/3000:  61%|██████    | 1822/3000 [16:38<07:56,  2.47it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1823/3000:  61%|██████    | 1823/3000 [16:38<08:12,  2.39it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1823/3000:  61%|██████    | 1823/3000 [16:38<08:12,  2.39it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1824/3000:  61%|██████    | 1823/3000 [16:38<08:12,  2.39it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1824/3000:  61%|██████    | 1824/3000 [16:39<08:32,  2.30it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1824/3000:  61%|██████    | 1824/3000 [16:39<08:32,  2.30it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.51e+6]Epoch 1825/3000:  61%|██████    | 1824/3000 [16:39<08:32,  2.30it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.51e+6]Epoch 1825/3000:  61%|██████    | 1825/3000 [16:39<08:51,  2.21it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.51e+6]Epoch 1825/3000:  61%|██████    | 1825/3000 [16:39<08:51,  2.21it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.51e+6]Epoch 1826/3000:  61%|██████    | 1825/3000 [16:39<08:51,  2.21it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.51e+6]Epoch 1826/3000:  61%|██████    | 1826/3000 [16:39<08:37,  2.27it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.51e+6]Epoch 1826/3000:  61%|██████    | 1826/3000 [16:39<08:37,  2.27it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6] Epoch 1827/3000:  61%|██████    | 1826/3000 [16:39<08:37,  2.27it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1827/3000:  61%|██████    | 1827/3000 [16:40<08:02,  2.43it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1827/3000:  61%|██████    | 1827/3000 [16:40<08:02,  2.43it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1828/3000:  61%|██████    | 1827/3000 [16:40<08:02,  2.43it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1828/3000:  61%|██████    | 1828/3000 [16:40<07:58,  2.45it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1828/3000:  61%|██████    | 1828/3000 [16:40<07:58,  2.45it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.51e+6]Epoch 1829/3000:  61%|██████    | 1828/3000 [16:40<07:58,  2.45it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.51e+6]Epoch 1829/3000:  61%|██████    | 1829/3000 [16:41<07:52,  2.48it/s, v_num=1, train_loss_step=1.57e+6, train_loss_epoch=1.51e+6]Epoch 1829/3000:  61%|██████    | 1829/3000 [16:41<07:52,  2.48it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.51e+6]Epoch 1830/3000:  61%|██████    | 1829/3000 [16:41<07:52,  2.48it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.51e+6]Epoch 1830/3000:  61%|██████    | 1830/3000 [16:41<07:54,  2.47it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.51e+6]Epoch 1830/3000:  61%|██████    | 1830/3000 [16:41<07:54,  2.47it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.51e+6]Epoch 1831/3000:  61%|██████    | 1830/3000 [16:41<07:54,  2.47it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.51e+6]Epoch 1831/3000:  61%|██████    | 1831/3000 [16:41<07:32,  2.59it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.51e+6]Epoch 1831/3000:  61%|██████    | 1831/3000 [16:41<07:32,  2.59it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1832/3000:  61%|██████    | 1831/3000 [16:41<07:32,  2.59it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1832/3000:  61%|██████    | 1832/3000 [16:42<08:02,  2.42it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1832/3000:  61%|██████    | 1832/3000 [16:42<08:02,  2.42it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.51e+6]Epoch 1833/3000:  61%|██████    | 1832/3000 [16:42<08:02,  2.42it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.51e+6]Epoch 1833/3000:  61%|██████    | 1833/3000 [16:42<08:03,  2.41it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.51e+6]Epoch 1833/3000:  61%|██████    | 1833/3000 [16:42<08:03,  2.41it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1834/3000:  61%|██████    | 1833/3000 [16:42<08:03,  2.41it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1834/3000:  61%|██████    | 1834/3000 [16:43<08:09,  2.38it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1834/3000:  61%|██████    | 1834/3000 [16:43<08:09,  2.38it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1835/3000:  61%|██████    | 1834/3000 [16:43<08:09,  2.38it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1835/3000:  61%|██████    | 1835/3000 [16:43<08:06,  2.39it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1835/3000:  61%|██████    | 1835/3000 [16:43<08:06,  2.39it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1836/3000:  61%|██████    | 1835/3000 [16:43<08:06,  2.39it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1836/3000:  61%|██████    | 1836/3000 [16:44<08:32,  2.27it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1836/3000:  61%|██████    | 1836/3000 [16:44<08:32,  2.27it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.51e+6]Epoch 1837/3000:  61%|██████    | 1836/3000 [16:44<08:32,  2.27it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.51e+6]Epoch 1837/3000:  61%|██████    | 1837/3000 [16:44<08:29,  2.28it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.51e+6]Epoch 1837/3000:  61%|██████    | 1837/3000 [16:44<08:29,  2.28it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1838/3000:  61%|██████    | 1837/3000 [16:44<08:29,  2.28it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1838/3000:  61%|██████▏   | 1838/3000 [16:44<08:33,  2.26it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1838/3000:  61%|██████▏   | 1838/3000 [16:44<08:33,  2.26it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1839/3000:  61%|██████▏   | 1838/3000 [16:44<08:33,  2.26it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1839/3000:  61%|██████▏   | 1839/3000 [16:45<08:12,  2.36it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1839/3000:  61%|██████▏   | 1839/3000 [16:45<08:12,  2.36it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6] Epoch 1840/3000:  61%|██████▏   | 1839/3000 [16:45<08:12,  2.36it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1840/3000:  61%|██████▏   | 1840/3000 [16:45<08:30,  2.27it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1840/3000:  61%|██████▏   | 1840/3000 [16:45<08:30,  2.27it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1841/3000:  61%|██████▏   | 1840/3000 [16:45<08:30,  2.27it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1841/3000:  61%|██████▏   | 1841/3000 [16:46<08:26,  2.29it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1841/3000:  61%|██████▏   | 1841/3000 [16:46<08:26,  2.29it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.51e+6]Epoch 1842/3000:  61%|██████▏   | 1841/3000 [16:46<08:26,  2.29it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.51e+6]Epoch 1842/3000:  61%|██████▏   | 1842/3000 [16:46<08:46,  2.20it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.51e+6]Epoch 1842/3000:  61%|██████▏   | 1842/3000 [16:46<08:46,  2.20it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1843/3000:  61%|██████▏   | 1842/3000 [16:46<08:46,  2.20it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1843/3000:  61%|██████▏   | 1843/3000 [16:47<08:20,  2.31it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1843/3000:  61%|██████▏   | 1843/3000 [16:47<08:20,  2.31it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1844/3000:  61%|██████▏   | 1843/3000 [16:47<08:20,  2.31it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1844/3000:  61%|██████▏   | 1844/3000 [16:47<07:53,  2.44it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1844/3000:  61%|██████▏   | 1844/3000 [16:47<07:53,  2.44it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1845/3000:  61%|██████▏   | 1844/3000 [16:47<07:53,  2.44it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1845/3000:  62%|██████▏   | 1845/3000 [16:48<08:32,  2.25it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1845/3000:  62%|██████▏   | 1845/3000 [16:48<08:32,  2.25it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1846/3000:  62%|██████▏   | 1845/3000 [16:48<08:32,  2.25it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1846/3000:  62%|██████▏   | 1846/3000 [16:48<08:35,  2.24it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1846/3000:  62%|██████▏   | 1846/3000 [16:48<08:35,  2.24it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.51e+6]Epoch 1847/3000:  62%|██████▏   | 1846/3000 [16:48<08:35,  2.24it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.51e+6]Epoch 1847/3000:  62%|██████▏   | 1847/3000 [16:48<08:14,  2.33it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.51e+6]Epoch 1847/3000:  62%|██████▏   | 1847/3000 [16:48<08:14,  2.33it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1848/3000:  62%|██████▏   | 1847/3000 [16:48<08:14,  2.33it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1848/3000:  62%|██████▏   | 1848/3000 [16:49<07:37,  2.52it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1848/3000:  62%|██████▏   | 1848/3000 [16:49<07:37,  2.52it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1849/3000:  62%|██████▏   | 1848/3000 [16:49<07:37,  2.52it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1849/3000:  62%|██████▏   | 1849/3000 [16:49<07:54,  2.43it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1849/3000:  62%|██████▏   | 1849/3000 [16:49<07:54,  2.43it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6] Epoch 1850/3000:  62%|██████▏   | 1849/3000 [16:49<07:54,  2.43it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1850/3000:  62%|██████▏   | 1850/3000 [16:50<07:55,  2.42it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1850/3000:  62%|██████▏   | 1850/3000 [16:50<07:55,  2.42it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1851/3000:  62%|██████▏   | 1850/3000 [16:50<07:55,  2.42it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1851/3000:  62%|██████▏   | 1851/3000 [16:50<08:27,  2.26it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1851/3000:  62%|██████▏   | 1851/3000 [16:50<08:27,  2.26it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.51e+6]Epoch 1852/3000:  62%|██████▏   | 1851/3000 [16:50<08:27,  2.26it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.51e+6]Epoch 1852/3000:  62%|██████▏   | 1852/3000 [16:51<08:26,  2.27it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.51e+6]Epoch 1852/3000:  62%|██████▏   | 1852/3000 [16:51<08:26,  2.27it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1853/3000:  62%|██████▏   | 1852/3000 [16:51<08:26,  2.27it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1853/3000:  62%|██████▏   | 1853/3000 [16:51<08:41,  2.20it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1853/3000:  62%|██████▏   | 1853/3000 [16:51<08:41,  2.20it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1854/3000:  62%|██████▏   | 1853/3000 [16:51<08:41,  2.20it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1854/3000:  62%|██████▏   | 1854/3000 [16:51<08:33,  2.23it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1854/3000:  62%|██████▏   | 1854/3000 [16:51<08:33,  2.23it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1855/3000:  62%|██████▏   | 1854/3000 [16:51<08:33,  2.23it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1855/3000:  62%|██████▏   | 1855/3000 [16:52<08:32,  2.23it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1855/3000:  62%|██████▏   | 1855/3000 [16:52<08:32,  2.23it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1856/3000:  62%|██████▏   | 1855/3000 [16:52<08:32,  2.23it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1856/3000:  62%|██████▏   | 1856/3000 [16:52<08:18,  2.29it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1856/3000:  62%|██████▏   | 1856/3000 [16:52<08:18,  2.29it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1857/3000:  62%|██████▏   | 1856/3000 [16:52<08:18,  2.29it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1857/3000:  62%|██████▏   | 1857/3000 [16:53<08:04,  2.36it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1857/3000:  62%|██████▏   | 1857/3000 [16:53<08:04,  2.36it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6] Epoch 1858/3000:  62%|██████▏   | 1857/3000 [16:53<08:04,  2.36it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1858/3000:  62%|██████▏   | 1858/3000 [16:53<08:30,  2.24it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1858/3000:  62%|██████▏   | 1858/3000 [16:53<08:30,  2.24it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1859/3000:  62%|██████▏   | 1858/3000 [16:53<08:30,  2.24it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1859/3000:  62%|██████▏   | 1859/3000 [16:53<07:47,  2.44it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1859/3000:  62%|██████▏   | 1859/3000 [16:53<07:47,  2.44it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6] Epoch 1860/3000:  62%|██████▏   | 1859/3000 [16:54<07:47,  2.44it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1860/3000:  62%|██████▏   | 1860/3000 [16:54<07:28,  2.54it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1860/3000:  62%|██████▏   | 1860/3000 [16:54<07:28,  2.54it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1861/3000:  62%|██████▏   | 1860/3000 [16:54<07:28,  2.54it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1861/3000:  62%|██████▏   | 1861/3000 [16:54<08:09,  2.33it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1861/3000:  62%|██████▏   | 1861/3000 [16:54<08:09,  2.33it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6] Epoch 1862/3000:  62%|██████▏   | 1861/3000 [16:54<08:09,  2.33it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1862/3000:  62%|██████▏   | 1862/3000 [16:55<07:53,  2.40it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1862/3000:  62%|██████▏   | 1862/3000 [16:55<07:53,  2.40it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1863/3000:  62%|██████▏   | 1862/3000 [16:55<07:53,  2.40it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1863/3000:  62%|██████▏   | 1863/3000 [16:55<08:10,  2.32it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1863/3000:  62%|██████▏   | 1863/3000 [16:55<08:10,  2.32it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.51e+6]Epoch 1864/3000:  62%|██████▏   | 1863/3000 [16:55<08:10,  2.32it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.51e+6]Epoch 1864/3000:  62%|██████▏   | 1864/3000 [16:56<08:16,  2.29it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.51e+6]Epoch 1864/3000:  62%|██████▏   | 1864/3000 [16:56<08:16,  2.29it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6] Epoch 1865/3000:  62%|██████▏   | 1864/3000 [16:56<08:16,  2.29it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1865/3000:  62%|██████▏   | 1865/3000 [16:56<07:49,  2.42it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1865/3000:  62%|██████▏   | 1865/3000 [16:56<07:49,  2.42it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1866/3000:  62%|██████▏   | 1865/3000 [16:56<07:49,  2.42it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1866/3000:  62%|██████▏   | 1866/3000 [16:56<07:56,  2.38it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.51e+6]Epoch 1866/3000:  62%|██████▏   | 1866/3000 [16:56<07:56,  2.38it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1867/3000:  62%|██████▏   | 1866/3000 [16:56<07:56,  2.38it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1867/3000:  62%|██████▏   | 1867/3000 [16:57<08:06,  2.33it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1867/3000:  62%|██████▏   | 1867/3000 [16:57<08:06,  2.33it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1868/3000:  62%|██████▏   | 1867/3000 [16:57<08:06,  2.33it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1868/3000:  62%|██████▏   | 1868/3000 [16:57<07:54,  2.39it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1868/3000:  62%|██████▏   | 1868/3000 [16:57<07:54,  2.39it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1869/3000:  62%|██████▏   | 1868/3000 [16:57<07:54,  2.39it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1869/3000:  62%|██████▏   | 1869/3000 [16:58<07:25,  2.54it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1869/3000:  62%|██████▏   | 1869/3000 [16:58<07:25,  2.54it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.51e+6]Epoch 1870/3000:  62%|██████▏   | 1869/3000 [16:58<07:25,  2.54it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.51e+6]Epoch 1870/3000:  62%|██████▏   | 1870/3000 [16:58<07:43,  2.44it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.51e+6]Epoch 1870/3000:  62%|██████▏   | 1870/3000 [16:58<07:43,  2.44it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.51e+6]Epoch 1871/3000:  62%|██████▏   | 1870/3000 [16:58<07:43,  2.44it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.51e+6]Epoch 1871/3000:  62%|██████▏   | 1871/3000 [16:59<07:44,  2.43it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.51e+6]Epoch 1871/3000:  62%|██████▏   | 1871/3000 [16:59<07:44,  2.43it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1872/3000:  62%|██████▏   | 1871/3000 [16:59<07:44,  2.43it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1872/3000:  62%|██████▏   | 1872/3000 [16:59<07:50,  2.40it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.51e+6]Epoch 1872/3000:  62%|██████▏   | 1872/3000 [16:59<07:50,  2.40it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6] Epoch 1873/3000:  62%|██████▏   | 1872/3000 [16:59<07:50,  2.40it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1873/3000:  62%|██████▏   | 1873/3000 [16:59<08:10,  2.30it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1873/3000:  62%|██████▏   | 1873/3000 [16:59<08:10,  2.30it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1874/3000:  62%|██████▏   | 1873/3000 [16:59<08:10,  2.30it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1874/3000:  62%|██████▏   | 1874/3000 [17:00<08:05,  2.32it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1874/3000:  62%|██████▏   | 1874/3000 [17:00<08:05,  2.32it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1875/3000:  62%|██████▏   | 1874/3000 [17:00<08:05,  2.32it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1875/3000:  62%|██████▎   | 1875/3000 [17:00<07:47,  2.41it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.51e+6]Epoch 1875/3000:  62%|██████▎   | 1875/3000 [17:00<07:47,  2.41it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1876/3000:  62%|██████▎   | 1875/3000 [17:00<07:47,  2.41it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1876/3000:  63%|██████▎   | 1876/3000 [17:01<07:50,  2.39it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.51e+6]Epoch 1876/3000:  63%|██████▎   | 1876/3000 [17:01<07:50,  2.39it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1877/3000:  63%|██████▎   | 1876/3000 [17:01<07:50,  2.39it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1877/3000:  63%|██████▎   | 1877/3000 [17:01<07:22,  2.54it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.51e+6]Epoch 1877/3000:  63%|██████▎   | 1877/3000 [17:01<07:22,  2.54it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6] Epoch 1878/3000:  63%|██████▎   | 1877/3000 [17:01<07:22,  2.54it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1878/3000:  63%|██████▎   | 1878/3000 [17:01<07:26,  2.51it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1878/3000:  63%|██████▎   | 1878/3000 [17:01<07:26,  2.51it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.5e+6]Epoch 1879/3000:  63%|██████▎   | 1878/3000 [17:01<07:26,  2.51it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.5e+6]Epoch 1879/3000:  63%|██████▎   | 1879/3000 [17:02<07:37,  2.45it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.5e+6]Epoch 1879/3000:  63%|██████▎   | 1879/3000 [17:02<07:37,  2.45it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1880/3000:  63%|██████▎   | 1879/3000 [17:02<07:37,  2.45it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1880/3000:  63%|██████▎   | 1880/3000 [17:02<07:26,  2.51it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1880/3000:  63%|██████▎   | 1880/3000 [17:02<07:26,  2.51it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.5e+6]Epoch 1881/3000:  63%|██████▎   | 1880/3000 [17:02<07:26,  2.51it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.5e+6]Epoch 1881/3000:  63%|██████▎   | 1881/3000 [17:03<07:25,  2.51it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.5e+6]Epoch 1881/3000:  63%|██████▎   | 1881/3000 [17:03<07:25,  2.51it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6] Epoch 1882/3000:  63%|██████▎   | 1881/3000 [17:03<07:25,  2.51it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1882/3000:  63%|██████▎   | 1882/3000 [17:03<07:16,  2.56it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1882/3000:  63%|██████▎   | 1882/3000 [17:03<07:16,  2.56it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1883/3000:  63%|██████▎   | 1882/3000 [17:03<07:16,  2.56it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1883/3000:  63%|██████▎   | 1883/3000 [17:03<07:08,  2.60it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1883/3000:  63%|██████▎   | 1883/3000 [17:03<07:08,  2.60it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1884/3000:  63%|██████▎   | 1883/3000 [17:03<07:08,  2.60it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1884/3000:  63%|██████▎   | 1884/3000 [17:04<07:19,  2.54it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1884/3000:  63%|██████▎   | 1884/3000 [17:04<07:19,  2.54it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1885/3000:  63%|██████▎   | 1884/3000 [17:04<07:19,  2.54it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1885/3000:  63%|██████▎   | 1885/3000 [17:04<07:35,  2.45it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1885/3000:  63%|██████▎   | 1885/3000 [17:04<07:35,  2.45it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1886/3000:  63%|██████▎   | 1885/3000 [17:04<07:35,  2.45it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1886/3000:  63%|██████▎   | 1886/3000 [17:05<07:56,  2.34it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1886/3000:  63%|██████▎   | 1886/3000 [17:05<07:56,  2.34it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1887/3000:  63%|██████▎   | 1886/3000 [17:05<07:56,  2.34it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1887/3000:  63%|██████▎   | 1887/3000 [17:05<07:40,  2.42it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1887/3000:  63%|██████▎   | 1887/3000 [17:05<07:40,  2.42it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1888/3000:  63%|██████▎   | 1887/3000 [17:05<07:40,  2.42it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1888/3000:  63%|██████▎   | 1888/3000 [17:05<07:12,  2.57it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1888/3000:  63%|██████▎   | 1888/3000 [17:05<07:12,  2.57it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1889/3000:  63%|██████▎   | 1888/3000 [17:05<07:12,  2.57it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1889/3000:  63%|██████▎   | 1889/3000 [17:06<07:31,  2.46it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1889/3000:  63%|██████▎   | 1889/3000 [17:06<07:31,  2.46it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1890/3000:  63%|██████▎   | 1889/3000 [17:06<07:31,  2.46it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1890/3000:  63%|██████▎   | 1890/3000 [17:06<07:45,  2.38it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1890/3000:  63%|██████▎   | 1890/3000 [17:06<07:45,  2.38it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.5e+6]Epoch 1891/3000:  63%|██████▎   | 1890/3000 [17:06<07:45,  2.38it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.5e+6]Epoch 1891/3000:  63%|██████▎   | 1891/3000 [17:07<07:59,  2.31it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.5e+6]Epoch 1891/3000:  63%|██████▎   | 1891/3000 [17:07<07:59,  2.31it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6] Epoch 1892/3000:  63%|██████▎   | 1891/3000 [17:07<07:59,  2.31it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1892/3000:  63%|██████▎   | 1892/3000 [17:07<07:57,  2.32it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1892/3000:  63%|██████▎   | 1892/3000 [17:07<07:57,  2.32it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1893/3000:  63%|██████▎   | 1892/3000 [17:07<07:57,  2.32it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1893/3000:  63%|██████▎   | 1893/3000 [17:08<07:55,  2.33it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1893/3000:  63%|██████▎   | 1893/3000 [17:08<07:55,  2.33it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1894/3000:  63%|██████▎   | 1893/3000 [17:08<07:55,  2.33it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1894/3000:  63%|██████▎   | 1894/3000 [17:08<08:08,  2.26it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1894/3000:  63%|██████▎   | 1894/3000 [17:08<08:08,  2.26it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1895/3000:  63%|██████▎   | 1894/3000 [17:08<08:08,  2.26it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1895/3000:  63%|██████▎   | 1895/3000 [17:09<08:22,  2.20it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1895/3000:  63%|██████▎   | 1895/3000 [17:09<08:22,  2.20it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1896/3000:  63%|██████▎   | 1895/3000 [17:09<08:22,  2.20it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1896/3000:  63%|██████▎   | 1896/3000 [17:09<08:07,  2.27it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1896/3000:  63%|██████▎   | 1896/3000 [17:09<08:07,  2.27it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.5e+6]Epoch 1897/3000:  63%|██████▎   | 1896/3000 [17:09<08:07,  2.27it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.5e+6]Epoch 1897/3000:  63%|██████▎   | 1897/3000 [17:09<07:58,  2.31it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.5e+6]Epoch 1897/3000:  63%|██████▎   | 1897/3000 [17:09<07:58,  2.31it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.5e+6]Epoch 1898/3000:  63%|██████▎   | 1897/3000 [17:09<07:58,  2.31it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.5e+6]Epoch 1898/3000:  63%|██████▎   | 1898/3000 [17:10<08:00,  2.29it/s, v_num=1, train_loss_step=1.56e+6, train_loss_epoch=1.5e+6]Epoch 1898/3000:  63%|██████▎   | 1898/3000 [17:10<08:00,  2.29it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6] Epoch 1899/3000:  63%|██████▎   | 1898/3000 [17:10<08:00,  2.29it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1899/3000:  63%|██████▎   | 1899/3000 [17:10<08:04,  2.27it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1899/3000:  63%|██████▎   | 1899/3000 [17:10<08:04,  2.27it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1900/3000:  63%|██████▎   | 1899/3000 [17:10<08:04,  2.27it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1900/3000:  63%|██████▎   | 1900/3000 [17:11<08:07,  2.26it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1900/3000:  63%|██████▎   | 1900/3000 [17:11<08:07,  2.26it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1901/3000:  63%|██████▎   | 1900/3000 [17:11<08:07,  2.26it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1901/3000:  63%|██████▎   | 1901/3000 [17:11<07:58,  2.30it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1901/3000:  63%|██████▎   | 1901/3000 [17:11<07:58,  2.30it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1902/3000:  63%|██████▎   | 1901/3000 [17:11<07:58,  2.30it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1902/3000:  63%|██████▎   | 1902/3000 [17:12<08:05,  2.26it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1902/3000:  63%|██████▎   | 1902/3000 [17:12<08:05,  2.26it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.5e+6]Epoch 1903/3000:  63%|██████▎   | 1902/3000 [17:12<08:05,  2.26it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.5e+6]Epoch 1903/3000:  63%|██████▎   | 1903/3000 [17:12<07:49,  2.34it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.5e+6]Epoch 1903/3000:  63%|██████▎   | 1903/3000 [17:12<07:49,  2.34it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.5e+6]Epoch 1904/3000:  63%|██████▎   | 1903/3000 [17:12<07:49,  2.34it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.5e+6]Epoch 1904/3000:  63%|██████▎   | 1904/3000 [17:12<08:05,  2.26it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.5e+6]Epoch 1904/3000:  63%|██████▎   | 1904/3000 [17:12<08:05,  2.26it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1905/3000:  63%|██████▎   | 1904/3000 [17:12<08:05,  2.26it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1905/3000:  64%|██████▎   | 1905/3000 [17:13<08:02,  2.27it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1905/3000:  64%|██████▎   | 1905/3000 [17:13<08:02,  2.27it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.5e+6]Epoch 1906/3000:  64%|██████▎   | 1905/3000 [17:13<08:02,  2.27it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.5e+6]Epoch 1906/3000:  64%|██████▎   | 1906/3000 [17:13<07:47,  2.34it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.5e+6]Epoch 1906/3000:  64%|██████▎   | 1906/3000 [17:13<07:47,  2.34it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6] Epoch 1907/3000:  64%|██████▎   | 1906/3000 [17:13<07:47,  2.34it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1907/3000:  64%|██████▎   | 1907/3000 [17:14<08:01,  2.27it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1907/3000:  64%|██████▎   | 1907/3000 [17:14<08:01,  2.27it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1908/3000:  64%|██████▎   | 1907/3000 [17:14<08:01,  2.27it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1908/3000:  64%|██████▎   | 1908/3000 [17:14<07:59,  2.28it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1908/3000:  64%|██████▎   | 1908/3000 [17:14<07:59,  2.28it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1909/3000:  64%|██████▎   | 1908/3000 [17:14<07:59,  2.28it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1909/3000:  64%|██████▎   | 1909/3000 [17:15<08:00,  2.27it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1909/3000:  64%|██████▎   | 1909/3000 [17:15<08:00,  2.27it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.5e+6]Epoch 1910/3000:  64%|██████▎   | 1909/3000 [17:15<08:00,  2.27it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.5e+6]Epoch 1910/3000:  64%|██████▎   | 1910/3000 [17:15<07:49,  2.32it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.5e+6]Epoch 1910/3000:  64%|██████▎   | 1910/3000 [17:15<07:49,  2.32it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.5e+6]Epoch 1911/3000:  64%|██████▎   | 1910/3000 [17:15<07:49,  2.32it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.5e+6]Epoch 1911/3000:  64%|██████▎   | 1911/3000 [17:15<07:45,  2.34it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.5e+6]Epoch 1911/3000:  64%|██████▎   | 1911/3000 [17:15<07:45,  2.34it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1912/3000:  64%|██████▎   | 1911/3000 [17:15<07:45,  2.34it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1912/3000:  64%|██████▎   | 1912/3000 [17:16<07:45,  2.34it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1912/3000:  64%|██████▎   | 1912/3000 [17:16<07:45,  2.34it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.5e+6]Epoch 1913/3000:  64%|██████▎   | 1912/3000 [17:16<07:45,  2.34it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.5e+6]Epoch 1913/3000:  64%|██████▍   | 1913/3000 [17:16<07:52,  2.30it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.5e+6]Epoch 1913/3000:  64%|██████▍   | 1913/3000 [17:16<07:52,  2.30it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.5e+6]Epoch 1914/3000:  64%|██████▍   | 1913/3000 [17:16<07:52,  2.30it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.5e+6]Epoch 1914/3000:  64%|██████▍   | 1914/3000 [17:17<07:57,  2.27it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.5e+6]Epoch 1914/3000:  64%|██████▍   | 1914/3000 [17:17<07:57,  2.27it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1915/3000:  64%|██████▍   | 1914/3000 [17:17<07:57,  2.27it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1915/3000:  64%|██████▍   | 1915/3000 [17:17<07:34,  2.39it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1915/3000:  64%|██████▍   | 1915/3000 [17:17<07:34,  2.39it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1916/3000:  64%|██████▍   | 1915/3000 [17:17<07:34,  2.39it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1916/3000:  64%|██████▍   | 1916/3000 [17:18<07:39,  2.36it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1916/3000:  64%|██████▍   | 1916/3000 [17:18<07:39,  2.36it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1917/3000:  64%|██████▍   | 1916/3000 [17:18<07:39,  2.36it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1917/3000:  64%|██████▍   | 1917/3000 [17:18<07:23,  2.44it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1917/3000:  64%|██████▍   | 1917/3000 [17:18<07:23,  2.44it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.5e+6]Epoch 1918/3000:  64%|██████▍   | 1917/3000 [17:18<07:23,  2.44it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.5e+6]Epoch 1918/3000:  64%|██████▍   | 1918/3000 [17:18<07:16,  2.48it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.5e+6]Epoch 1918/3000:  64%|██████▍   | 1918/3000 [17:18<07:16,  2.48it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.5e+6]Epoch 1919/3000:  64%|██████▍   | 1918/3000 [17:18<07:16,  2.48it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.5e+6]Epoch 1919/3000:  64%|██████▍   | 1919/3000 [17:19<07:39,  2.36it/s, v_num=1, train_loss_step=1.55e+6, train_loss_epoch=1.5e+6]Epoch 1919/3000:  64%|██████▍   | 1919/3000 [17:19<07:39,  2.36it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6] Epoch 1920/3000:  64%|██████▍   | 1919/3000 [17:19<07:39,  2.36it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1920/3000:  64%|██████▍   | 1920/3000 [17:19<07:45,  2.32it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1920/3000:  64%|██████▍   | 1920/3000 [17:19<07:45,  2.32it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1921/3000:  64%|██████▍   | 1920/3000 [17:19<07:45,  2.32it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1921/3000:  64%|██████▍   | 1921/3000 [17:20<07:39,  2.35it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1921/3000:  64%|██████▍   | 1921/3000 [17:20<07:39,  2.35it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.5e+6]Epoch 1922/3000:  64%|██████▍   | 1921/3000 [17:20<07:39,  2.35it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.5e+6]Epoch 1922/3000:  64%|██████▍   | 1922/3000 [17:20<07:43,  2.32it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.5e+6]Epoch 1922/3000:  64%|██████▍   | 1922/3000 [17:20<07:43,  2.32it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.5e+6]Epoch 1923/3000:  64%|██████▍   | 1922/3000 [17:20<07:43,  2.32it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.5e+6]Epoch 1923/3000:  64%|██████▍   | 1923/3000 [17:21<08:12,  2.19it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.5e+6]Epoch 1923/3000:  64%|██████▍   | 1923/3000 [17:21<08:12,  2.19it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1924/3000:  64%|██████▍   | 1923/3000 [17:21<08:12,  2.19it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1924/3000:  64%|██████▍   | 1924/3000 [17:21<08:20,  2.15it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1924/3000:  64%|██████▍   | 1924/3000 [17:21<08:20,  2.15it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1925/3000:  64%|██████▍   | 1924/3000 [17:21<08:20,  2.15it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1925/3000:  64%|██████▍   | 1925/3000 [17:22<08:13,  2.18it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1925/3000:  64%|██████▍   | 1925/3000 [17:22<08:13,  2.18it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.5e+6]Epoch 1926/3000:  64%|██████▍   | 1925/3000 [17:22<08:13,  2.18it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.5e+6]Epoch 1926/3000:  64%|██████▍   | 1926/3000 [17:22<08:01,  2.23it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.5e+6]Epoch 1926/3000:  64%|██████▍   | 1926/3000 [17:22<08:01,  2.23it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1927/3000:  64%|██████▍   | 1926/3000 [17:22<08:01,  2.23it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1927/3000:  64%|██████▍   | 1927/3000 [17:22<08:00,  2.23it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1927/3000:  64%|██████▍   | 1927/3000 [17:22<08:00,  2.23it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1928/3000:  64%|██████▍   | 1927/3000 [17:22<08:00,  2.23it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1928/3000:  64%|██████▍   | 1928/3000 [17:23<07:49,  2.28it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1928/3000:  64%|██████▍   | 1928/3000 [17:23<07:49,  2.28it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.5e+6]Epoch 1929/3000:  64%|██████▍   | 1928/3000 [17:23<07:49,  2.28it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.5e+6]Epoch 1929/3000:  64%|██████▍   | 1929/3000 [17:23<07:28,  2.39it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.5e+6]Epoch 1929/3000:  64%|██████▍   | 1929/3000 [17:23<07:28,  2.39it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6] Epoch 1930/3000:  64%|██████▍   | 1929/3000 [17:23<07:28,  2.39it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1930/3000:  64%|██████▍   | 1930/3000 [17:24<07:31,  2.37it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1930/3000:  64%|██████▍   | 1930/3000 [17:24<07:31,  2.37it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.5e+6]Epoch 1931/3000:  64%|██████▍   | 1930/3000 [17:24<07:31,  2.37it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.5e+6]Epoch 1931/3000:  64%|██████▍   | 1931/3000 [17:24<07:28,  2.38it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.5e+6]Epoch 1931/3000:  64%|██████▍   | 1931/3000 [17:24<07:28,  2.38it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6] Epoch 1932/3000:  64%|██████▍   | 1931/3000 [17:24<07:28,  2.38it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1932/3000:  64%|██████▍   | 1932/3000 [17:25<07:25,  2.39it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1932/3000:  64%|██████▍   | 1932/3000 [17:25<07:25,  2.39it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1933/3000:  64%|██████▍   | 1932/3000 [17:25<07:25,  2.39it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1933/3000:  64%|██████▍   | 1933/3000 [17:25<07:31,  2.36it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1933/3000:  64%|██████▍   | 1933/3000 [17:25<07:31,  2.36it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6] Epoch 1934/3000:  64%|██████▍   | 1933/3000 [17:25<07:31,  2.36it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1934/3000:  64%|██████▍   | 1934/3000 [17:25<07:42,  2.30it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1934/3000:  64%|██████▍   | 1934/3000 [17:25<07:42,  2.30it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.5e+6]Epoch 1935/3000:  64%|██████▍   | 1934/3000 [17:25<07:42,  2.30it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.5e+6]Epoch 1935/3000:  64%|██████▍   | 1935/3000 [17:26<07:54,  2.24it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.5e+6]Epoch 1935/3000:  64%|██████▍   | 1935/3000 [17:26<07:54,  2.24it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1936/3000:  64%|██████▍   | 1935/3000 [17:26<07:54,  2.24it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1936/3000:  65%|██████▍   | 1936/3000 [17:26<07:53,  2.25it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1936/3000:  65%|██████▍   | 1936/3000 [17:26<07:53,  2.25it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.5e+6]Epoch 1937/3000:  65%|██████▍   | 1936/3000 [17:26<07:53,  2.25it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.5e+6]Epoch 1937/3000:  65%|██████▍   | 1937/3000 [17:27<07:46,  2.28it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.5e+6]Epoch 1937/3000:  65%|██████▍   | 1937/3000 [17:27<07:46,  2.28it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1938/3000:  65%|██████▍   | 1937/3000 [17:27<07:46,  2.28it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1938/3000:  65%|██████▍   | 1938/3000 [17:27<07:45,  2.28it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1938/3000:  65%|██████▍   | 1938/3000 [17:27<07:45,  2.28it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6] Epoch 1939/3000:  65%|██████▍   | 1938/3000 [17:27<07:45,  2.28it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1939/3000:  65%|██████▍   | 1939/3000 [17:28<07:22,  2.40it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1939/3000:  65%|██████▍   | 1939/3000 [17:28<07:22,  2.40it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.5e+6]Epoch 1940/3000:  65%|██████▍   | 1939/3000 [17:28<07:22,  2.40it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.5e+6]Epoch 1940/3000:  65%|██████▍   | 1940/3000 [17:28<07:21,  2.40it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.5e+6]Epoch 1940/3000:  65%|██████▍   | 1940/3000 [17:28<07:21,  2.40it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1941/3000:  65%|██████▍   | 1940/3000 [17:28<07:21,  2.40it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1941/3000:  65%|██████▍   | 1941/3000 [17:28<07:03,  2.50it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1941/3000:  65%|██████▍   | 1941/3000 [17:28<07:03,  2.50it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1942/3000:  65%|██████▍   | 1941/3000 [17:28<07:03,  2.50it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1942/3000:  65%|██████▍   | 1942/3000 [17:29<07:00,  2.52it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.5e+6]Epoch 1942/3000:  65%|██████▍   | 1942/3000 [17:29<07:00,  2.52it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1943/3000:  65%|██████▍   | 1942/3000 [17:29<07:00,  2.52it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1943/3000:  65%|██████▍   | 1943/3000 [17:29<06:49,  2.58it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1943/3000:  65%|██████▍   | 1943/3000 [17:29<06:49,  2.58it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1944/3000:  65%|██████▍   | 1943/3000 [17:29<06:49,  2.58it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1944/3000:  65%|██████▍   | 1944/3000 [17:30<07:07,  2.47it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1944/3000:  65%|██████▍   | 1944/3000 [17:30<07:07,  2.47it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.5e+6]Epoch 1945/3000:  65%|██████▍   | 1944/3000 [17:30<07:07,  2.47it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.5e+6]Epoch 1945/3000:  65%|██████▍   | 1945/3000 [17:30<07:14,  2.43it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.5e+6]Epoch 1945/3000:  65%|██████▍   | 1945/3000 [17:30<07:14,  2.43it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1946/3000:  65%|██████▍   | 1945/3000 [17:30<07:14,  2.43it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1946/3000:  65%|██████▍   | 1946/3000 [17:30<07:34,  2.32it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1946/3000:  65%|██████▍   | 1946/3000 [17:30<07:34,  2.32it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.5e+6]Epoch 1947/3000:  65%|██████▍   | 1946/3000 [17:30<07:34,  2.32it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.5e+6]Epoch 1947/3000:  65%|██████▍   | 1947/3000 [17:31<07:47,  2.25it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.5e+6]Epoch 1947/3000:  65%|██████▍   | 1947/3000 [17:31<07:47,  2.25it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.5e+6]Epoch 1948/3000:  65%|██████▍   | 1947/3000 [17:31<07:47,  2.25it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.5e+6]Epoch 1948/3000:  65%|██████▍   | 1948/3000 [17:31<07:48,  2.25it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.5e+6]Epoch 1948/3000:  65%|██████▍   | 1948/3000 [17:31<07:48,  2.25it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1949/3000:  65%|██████▍   | 1948/3000 [17:31<07:48,  2.25it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1949/3000:  65%|██████▍   | 1949/3000 [17:32<07:44,  2.26it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.5e+6]Epoch 1949/3000:  65%|██████▍   | 1949/3000 [17:32<07:44,  2.26it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6] Epoch 1950/3000:  65%|██████▍   | 1949/3000 [17:32<07:44,  2.26it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1950/3000:  65%|██████▌   | 1950/3000 [17:32<07:52,  2.22it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.5e+6]Epoch 1950/3000:  65%|██████▌   | 1950/3000 [17:32<07:52,  2.22it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1951/3000:  65%|██████▌   | 1950/3000 [17:32<07:52,  2.22it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1951/3000:  65%|██████▌   | 1951/3000 [17:33<07:39,  2.28it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.5e+6]Epoch 1951/3000:  65%|██████▌   | 1951/3000 [17:33<07:39,  2.28it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.49e+6]Epoch 1952/3000:  65%|██████▌   | 1951/3000 [17:33<07:39,  2.28it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.49e+6]Epoch 1952/3000:  65%|██████▌   | 1952/3000 [17:33<07:38,  2.29it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.49e+6]Epoch 1952/3000:  65%|██████▌   | 1952/3000 [17:33<07:38,  2.29it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1953/3000:  65%|██████▌   | 1952/3000 [17:33<07:38,  2.29it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1953/3000:  65%|██████▌   | 1953/3000 [17:34<07:37,  2.29it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1953/3000:  65%|██████▌   | 1953/3000 [17:34<07:37,  2.29it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 1954/3000:  65%|██████▌   | 1953/3000 [17:34<07:37,  2.29it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 1954/3000:  65%|██████▌   | 1954/3000 [17:34<07:46,  2.24it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 1954/3000:  65%|██████▌   | 1954/3000 [17:34<07:46,  2.24it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1955/3000:  65%|██████▌   | 1954/3000 [17:34<07:46,  2.24it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1955/3000:  65%|██████▌   | 1955/3000 [17:34<07:34,  2.30it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1955/3000:  65%|██████▌   | 1955/3000 [17:34<07:34,  2.30it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1956/3000:  65%|██████▌   | 1955/3000 [17:34<07:34,  2.30it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1956/3000:  65%|██████▌   | 1956/3000 [17:35<07:39,  2.27it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1956/3000:  65%|██████▌   | 1956/3000 [17:35<07:39,  2.27it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 1957/3000:  65%|██████▌   | 1956/3000 [17:35<07:39,  2.27it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 1957/3000:  65%|██████▌   | 1957/3000 [17:35<07:40,  2.27it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 1957/3000:  65%|██████▌   | 1957/3000 [17:35<07:40,  2.27it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6] Epoch 1958/3000:  65%|██████▌   | 1957/3000 [17:35<07:40,  2.27it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 1958/3000:  65%|██████▌   | 1958/3000 [17:36<07:18,  2.38it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 1958/3000:  65%|██████▌   | 1958/3000 [17:36<07:18,  2.38it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1959/3000:  65%|██████▌   | 1958/3000 [17:36<07:18,  2.38it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1959/3000:  65%|██████▌   | 1959/3000 [17:36<07:21,  2.36it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1959/3000:  65%|██████▌   | 1959/3000 [17:36<07:21,  2.36it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6] Epoch 1960/3000:  65%|██████▌   | 1959/3000 [17:36<07:21,  2.36it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 1960/3000:  65%|██████▌   | 1960/3000 [17:37<07:35,  2.28it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 1960/3000:  65%|██████▌   | 1960/3000 [17:37<07:35,  2.28it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 1961/3000:  65%|██████▌   | 1960/3000 [17:37<07:35,  2.28it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 1961/3000:  65%|██████▌   | 1961/3000 [17:37<07:31,  2.30it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 1961/3000:  65%|██████▌   | 1961/3000 [17:37<07:31,  2.30it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 1962/3000:  65%|██████▌   | 1961/3000 [17:37<07:31,  2.30it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 1962/3000:  65%|██████▌   | 1962/3000 [17:37<07:30,  2.30it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 1962/3000:  65%|██████▌   | 1962/3000 [17:37<07:30,  2.30it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1963/3000:  65%|██████▌   | 1962/3000 [17:37<07:30,  2.30it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1963/3000:  65%|██████▌   | 1963/3000 [17:38<07:29,  2.31it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1963/3000:  65%|██████▌   | 1963/3000 [17:38<07:29,  2.31it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1964/3000:  65%|██████▌   | 1963/3000 [17:38<07:29,  2.31it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1964/3000:  65%|██████▌   | 1964/3000 [17:38<07:37,  2.27it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1964/3000:  65%|██████▌   | 1964/3000 [17:38<07:37,  2.27it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 1965/3000:  65%|██████▌   | 1964/3000 [17:38<07:37,  2.27it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 1965/3000:  66%|██████▌   | 1965/3000 [17:39<07:20,  2.35it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 1965/3000:  66%|██████▌   | 1965/3000 [17:39<07:20,  2.35it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.49e+6]Epoch 1966/3000:  66%|██████▌   | 1965/3000 [17:39<07:20,  2.35it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.49e+6]Epoch 1966/3000:  66%|██████▌   | 1966/3000 [17:39<07:34,  2.28it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.49e+6]Epoch 1966/3000:  66%|██████▌   | 1966/3000 [17:39<07:34,  2.28it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1967/3000:  66%|██████▌   | 1966/3000 [17:39<07:34,  2.28it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1967/3000:  66%|██████▌   | 1967/3000 [17:40<07:25,  2.32it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1967/3000:  66%|██████▌   | 1967/3000 [17:40<07:25,  2.32it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 1968/3000:  66%|██████▌   | 1967/3000 [17:40<07:25,  2.32it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 1968/3000:  66%|██████▌   | 1968/3000 [17:40<07:16,  2.36it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 1968/3000:  66%|██████▌   | 1968/3000 [17:40<07:16,  2.36it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1969/3000:  66%|██████▌   | 1968/3000 [17:40<07:16,  2.36it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1969/3000:  66%|██████▌   | 1969/3000 [17:40<07:23,  2.33it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1969/3000:  66%|██████▌   | 1969/3000 [17:40<07:23,  2.33it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1970/3000:  66%|██████▌   | 1969/3000 [17:40<07:23,  2.33it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1970/3000:  66%|██████▌   | 1970/3000 [17:41<07:14,  2.37it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1970/3000:  66%|██████▌   | 1970/3000 [17:41<07:14,  2.37it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1971/3000:  66%|██████▌   | 1970/3000 [17:41<07:14,  2.37it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1971/3000:  66%|██████▌   | 1971/3000 [17:41<07:46,  2.21it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1971/3000:  66%|██████▌   | 1971/3000 [17:41<07:46,  2.21it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.49e+6]Epoch 1972/3000:  66%|██████▌   | 1971/3000 [17:41<07:46,  2.21it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.49e+6]Epoch 1972/3000:  66%|██████▌   | 1972/3000 [17:42<08:00,  2.14it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.49e+6]Epoch 1972/3000:  66%|██████▌   | 1972/3000 [17:42<08:00,  2.14it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1973/3000:  66%|██████▌   | 1972/3000 [17:42<08:00,  2.14it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1973/3000:  66%|██████▌   | 1973/3000 [17:42<07:40,  2.23it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1973/3000:  66%|██████▌   | 1973/3000 [17:42<07:40,  2.23it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6] Epoch 1974/3000:  66%|██████▌   | 1973/3000 [17:42<07:40,  2.23it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 1974/3000:  66%|██████▌   | 1974/3000 [17:43<07:36,  2.25it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 1974/3000:  66%|██████▌   | 1974/3000 [17:43<07:36,  2.25it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1975/3000:  66%|██████▌   | 1974/3000 [17:43<07:36,  2.25it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1975/3000:  66%|██████▌   | 1975/3000 [17:43<07:29,  2.28it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1975/3000:  66%|██████▌   | 1975/3000 [17:43<07:29,  2.28it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1976/3000:  66%|██████▌   | 1975/3000 [17:43<07:29,  2.28it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1976/3000:  66%|██████▌   | 1976/3000 [17:44<07:14,  2.36it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1976/3000:  66%|██████▌   | 1976/3000 [17:44<07:14,  2.36it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 1977/3000:  66%|██████▌   | 1976/3000 [17:44<07:14,  2.36it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 1977/3000:  66%|██████▌   | 1977/3000 [17:44<07:25,  2.30it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 1977/3000:  66%|██████▌   | 1977/3000 [17:44<07:25,  2.30it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1978/3000:  66%|██████▌   | 1977/3000 [17:44<07:25,  2.30it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1978/3000:  66%|██████▌   | 1978/3000 [17:44<07:38,  2.23it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1978/3000:  66%|██████▌   | 1978/3000 [17:44<07:38,  2.23it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1979/3000:  66%|██████▌   | 1978/3000 [17:44<07:38,  2.23it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1979/3000:  66%|██████▌   | 1979/3000 [17:45<07:07,  2.39it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1979/3000:  66%|██████▌   | 1979/3000 [17:45<07:07,  2.39it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6] Epoch 1980/3000:  66%|██████▌   | 1979/3000 [17:45<07:07,  2.39it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 1980/3000:  66%|██████▌   | 1980/3000 [17:45<07:21,  2.31it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 1980/3000:  66%|██████▌   | 1980/3000 [17:45<07:21,  2.31it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 1981/3000:  66%|██████▌   | 1980/3000 [17:45<07:21,  2.31it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 1981/3000:  66%|██████▌   | 1981/3000 [17:46<07:12,  2.36it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 1981/3000:  66%|██████▌   | 1981/3000 [17:46<07:12,  2.36it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 1982/3000:  66%|██████▌   | 1981/3000 [17:46<07:12,  2.36it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 1982/3000:  66%|██████▌   | 1982/3000 [17:46<07:17,  2.33it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 1982/3000:  66%|██████▌   | 1982/3000 [17:46<07:17,  2.33it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 1983/3000:  66%|██████▌   | 1982/3000 [17:46<07:17,  2.33it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 1983/3000:  66%|██████▌   | 1983/3000 [17:47<07:47,  2.17it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 1983/3000:  66%|██████▌   | 1983/3000 [17:47<07:47,  2.17it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 1984/3000:  66%|██████▌   | 1983/3000 [17:47<07:47,  2.17it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 1984/3000:  66%|██████▌   | 1984/3000 [17:47<07:41,  2.20it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 1984/3000:  66%|██████▌   | 1984/3000 [17:47<07:41,  2.20it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6] Epoch 1985/3000:  66%|██████▌   | 1984/3000 [17:47<07:41,  2.20it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 1985/3000:  66%|██████▌   | 1985/3000 [17:47<06:58,  2.43it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 1985/3000:  66%|██████▌   | 1985/3000 [17:47<06:58,  2.43it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.49e+6]Epoch 1986/3000:  66%|██████▌   | 1985/3000 [17:47<06:58,  2.43it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.49e+6]Epoch 1986/3000:  66%|██████▌   | 1986/3000 [17:48<06:38,  2.55it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.49e+6]Epoch 1986/3000:  66%|██████▌   | 1986/3000 [17:48<06:38,  2.55it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.49e+6]Epoch 1987/3000:  66%|██████▌   | 1986/3000 [17:48<06:38,  2.55it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.49e+6]Epoch 1987/3000:  66%|██████▌   | 1987/3000 [17:48<07:10,  2.35it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.49e+6]Epoch 1987/3000:  66%|██████▌   | 1987/3000 [17:48<07:10,  2.35it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.49e+6]Epoch 1988/3000:  66%|██████▌   | 1987/3000 [17:48<07:10,  2.35it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.49e+6]Epoch 1988/3000:  66%|██████▋   | 1988/3000 [17:49<07:24,  2.28it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.49e+6]Epoch 1988/3000:  66%|██████▋   | 1988/3000 [17:49<07:24,  2.28it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 1989/3000:  66%|██████▋   | 1988/3000 [17:49<07:24,  2.28it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 1989/3000:  66%|██████▋   | 1989/3000 [17:49<07:12,  2.34it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 1989/3000:  66%|██████▋   | 1989/3000 [17:49<07:12,  2.34it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1990/3000:  66%|██████▋   | 1989/3000 [17:49<07:12,  2.34it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1990/3000:  66%|██████▋   | 1990/3000 [17:50<07:17,  2.31it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1990/3000:  66%|██████▋   | 1990/3000 [17:50<07:17,  2.31it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1991/3000:  66%|██████▋   | 1990/3000 [17:50<07:17,  2.31it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1991/3000:  66%|██████▋   | 1991/3000 [17:50<07:31,  2.23it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1991/3000:  66%|██████▋   | 1991/3000 [17:50<07:31,  2.23it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 1992/3000:  66%|██████▋   | 1991/3000 [17:50<07:31,  2.23it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 1992/3000:  66%|██████▋   | 1992/3000 [17:51<07:39,  2.19it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 1992/3000:  66%|██████▋   | 1992/3000 [17:51<07:39,  2.19it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1993/3000:  66%|██████▋   | 1992/3000 [17:51<07:39,  2.19it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1993/3000:  66%|██████▋   | 1993/3000 [17:51<07:42,  2.18it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1993/3000:  66%|██████▋   | 1993/3000 [17:51<07:42,  2.18it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6] Epoch 1994/3000:  66%|██████▋   | 1993/3000 [17:51<07:42,  2.18it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 1994/3000:  66%|██████▋   | 1994/3000 [17:51<07:22,  2.27it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 1994/3000:  66%|██████▋   | 1994/3000 [17:51<07:22,  2.27it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.49e+6]Epoch 1995/3000:  66%|██████▋   | 1994/3000 [17:51<07:22,  2.27it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.49e+6]Epoch 1995/3000:  66%|██████▋   | 1995/3000 [17:52<06:57,  2.41it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.49e+6]Epoch 1995/3000:  66%|██████▋   | 1995/3000 [17:52<06:57,  2.41it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1996/3000:  66%|██████▋   | 1995/3000 [17:52<06:57,  2.41it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1996/3000:  67%|██████▋   | 1996/3000 [17:52<07:12,  2.32it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1996/3000:  67%|██████▋   | 1996/3000 [17:52<07:12,  2.32it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 1997/3000:  67%|██████▋   | 1996/3000 [17:52<07:12,  2.32it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 1997/3000:  67%|██████▋   | 1997/3000 [17:53<07:01,  2.38it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 1997/3000:  67%|██████▋   | 1997/3000 [17:53<07:01,  2.38it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1998/3000:  67%|██████▋   | 1997/3000 [17:53<07:01,  2.38it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1998/3000:  67%|██████▋   | 1998/3000 [17:53<07:03,  2.37it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 1998/3000:  67%|██████▋   | 1998/3000 [17:53<07:03,  2.37it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1999/3000:  67%|██████▋   | 1998/3000 [17:53<07:03,  2.37it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1999/3000:  67%|██████▋   | 1999/3000 [17:54<07:12,  2.31it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 1999/3000:  67%|██████▋   | 1999/3000 [17:54<07:12,  2.31it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2000/3000:  67%|██████▋   | 1999/3000 [17:54<07:12,  2.31it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2000/3000:  67%|██████▋   | 2000/3000 [17:54<07:18,  2.28it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2000/3000:  67%|██████▋   | 2000/3000 [17:54<07:18,  2.28it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 2001/3000:  67%|██████▋   | 2000/3000 [17:54<07:18,  2.28it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 2001/3000:  67%|██████▋   | 2001/3000 [17:54<07:19,  2.27it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 2001/3000:  67%|██████▋   | 2001/3000 [17:54<07:19,  2.27it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6] Epoch 2002/3000:  67%|██████▋   | 2001/3000 [17:54<07:19,  2.27it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 2002/3000:  67%|██████▋   | 2002/3000 [17:55<07:03,  2.36it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 2002/3000:  67%|██████▋   | 2002/3000 [17:55<07:03,  2.36it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 2003/3000:  67%|██████▋   | 2002/3000 [17:55<07:03,  2.36it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 2003/3000:  67%|██████▋   | 2003/3000 [17:55<07:04,  2.35it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 2003/3000:  67%|██████▋   | 2003/3000 [17:55<07:04,  2.35it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 2004/3000:  67%|██████▋   | 2003/3000 [17:55<07:04,  2.35it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 2004/3000:  67%|██████▋   | 2004/3000 [17:56<07:12,  2.30it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 2004/3000:  67%|██████▋   | 2004/3000 [17:56<07:12,  2.30it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 2005/3000:  67%|██████▋   | 2004/3000 [17:56<07:12,  2.30it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 2005/3000:  67%|██████▋   | 2005/3000 [17:56<07:28,  2.22it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 2005/3000:  67%|██████▋   | 2005/3000 [17:56<07:28,  2.22it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2006/3000:  67%|██████▋   | 2005/3000 [17:56<07:28,  2.22it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2006/3000:  67%|██████▋   | 2006/3000 [17:57<07:15,  2.28it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2006/3000:  67%|██████▋   | 2006/3000 [17:57<07:15,  2.28it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 2007/3000:  67%|██████▋   | 2006/3000 [17:57<07:15,  2.28it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 2007/3000:  67%|██████▋   | 2007/3000 [17:57<07:00,  2.36it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 2007/3000:  67%|██████▋   | 2007/3000 [17:57<07:00,  2.36it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2008/3000:  67%|██████▋   | 2007/3000 [17:57<07:00,  2.36it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2008/3000:  67%|██████▋   | 2008/3000 [17:57<07:17,  2.27it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2008/3000:  67%|██████▋   | 2008/3000 [17:57<07:17,  2.27it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 2009/3000:  67%|██████▋   | 2008/3000 [17:57<07:17,  2.27it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 2009/3000:  67%|██████▋   | 2009/3000 [17:58<07:03,  2.34it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 2009/3000:  67%|██████▋   | 2009/3000 [17:58<07:03,  2.34it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.49e+6]Epoch 2010/3000:  67%|██████▋   | 2009/3000 [17:58<07:03,  2.34it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.49e+6]Epoch 2010/3000:  67%|██████▋   | 2010/3000 [17:58<07:10,  2.30it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.49e+6]Epoch 2010/3000:  67%|██████▋   | 2010/3000 [17:58<07:10,  2.30it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 2011/3000:  67%|██████▋   | 2010/3000 [17:58<07:10,  2.30it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 2011/3000:  67%|██████▋   | 2011/3000 [17:59<07:01,  2.35it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 2011/3000:  67%|██████▋   | 2011/3000 [17:59<07:01,  2.35it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6] Epoch 2012/3000:  67%|██████▋   | 2011/3000 [17:59<07:01,  2.35it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 2012/3000:  67%|██████▋   | 2012/3000 [17:59<07:01,  2.35it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 2012/3000:  67%|██████▋   | 2012/3000 [17:59<07:01,  2.35it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 2013/3000:  67%|██████▋   | 2012/3000 [17:59<07:01,  2.35it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 2013/3000:  67%|██████▋   | 2013/3000 [18:00<06:53,  2.39it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.49e+6]Epoch 2013/3000:  67%|██████▋   | 2013/3000 [18:00<06:53,  2.39it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2014/3000:  67%|██████▋   | 2013/3000 [18:00<06:53,  2.39it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2014/3000:  67%|██████▋   | 2014/3000 [18:00<06:52,  2.39it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2014/3000:  67%|██████▋   | 2014/3000 [18:00<06:52,  2.39it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6] Epoch 2015/3000:  67%|██████▋   | 2014/3000 [18:00<06:52,  2.39it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 2015/3000:  67%|██████▋   | 2015/3000 [18:00<06:54,  2.38it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 2015/3000:  67%|██████▋   | 2015/3000 [18:00<06:54,  2.38it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2016/3000:  67%|██████▋   | 2015/3000 [18:00<06:54,  2.38it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2016/3000:  67%|██████▋   | 2016/3000 [18:01<06:57,  2.36it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2016/3000:  67%|██████▋   | 2016/3000 [18:01<06:57,  2.36it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6] Epoch 2017/3000:  67%|██████▋   | 2016/3000 [18:01<06:57,  2.36it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 2017/3000:  67%|██████▋   | 2017/3000 [18:01<07:04,  2.31it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 2017/3000:  67%|██████▋   | 2017/3000 [18:01<07:04,  2.31it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 2018/3000:  67%|██████▋   | 2017/3000 [18:01<07:04,  2.31it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 2018/3000:  67%|██████▋   | 2018/3000 [18:02<06:49,  2.40it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 2018/3000:  67%|██████▋   | 2018/3000 [18:02<06:49,  2.40it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6] Epoch 2019/3000:  67%|██████▋   | 2018/3000 [18:02<06:49,  2.40it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 2019/3000:  67%|██████▋   | 2019/3000 [18:02<06:46,  2.41it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 2019/3000:  67%|██████▋   | 2019/3000 [18:02<06:46,  2.41it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 2020/3000:  67%|██████▋   | 2019/3000 [18:02<06:46,  2.41it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 2020/3000:  67%|██████▋   | 2020/3000 [18:03<07:07,  2.29it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 2020/3000:  67%|██████▋   | 2020/3000 [18:03<07:07,  2.29it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 2021/3000:  67%|██████▋   | 2020/3000 [18:03<07:07,  2.29it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 2021/3000:  67%|██████▋   | 2021/3000 [18:03<06:58,  2.34it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 2021/3000:  67%|██████▋   | 2021/3000 [18:03<06:58,  2.34it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 2022/3000:  67%|██████▋   | 2021/3000 [18:03<06:58,  2.34it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 2022/3000:  67%|██████▋   | 2022/3000 [18:03<06:54,  2.36it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.49e+6]Epoch 2022/3000:  67%|██████▋   | 2022/3000 [18:03<06:54,  2.36it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 2023/3000:  67%|██████▋   | 2022/3000 [18:03<06:54,  2.36it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 2023/3000:  67%|██████▋   | 2023/3000 [18:04<06:42,  2.43it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 2023/3000:  67%|██████▋   | 2023/3000 [18:04<06:42,  2.43it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6] Epoch 2024/3000:  67%|██████▋   | 2023/3000 [18:04<06:42,  2.43it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 2024/3000:  67%|██████▋   | 2024/3000 [18:04<06:49,  2.38it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 2024/3000:  67%|██████▋   | 2024/3000 [18:04<06:49,  2.38it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2025/3000:  67%|██████▋   | 2024/3000 [18:04<06:49,  2.38it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2025/3000:  68%|██████▊   | 2025/3000 [18:05<06:46,  2.40it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2025/3000:  68%|██████▊   | 2025/3000 [18:05<06:46,  2.40it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 2026/3000:  68%|██████▊   | 2025/3000 [18:05<06:46,  2.40it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 2026/3000:  68%|██████▊   | 2026/3000 [18:05<06:22,  2.54it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.49e+6]Epoch 2026/3000:  68%|██████▊   | 2026/3000 [18:05<06:22,  2.54it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.49e+6]Epoch 2027/3000:  68%|██████▊   | 2026/3000 [18:05<06:22,  2.54it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.49e+6]Epoch 2027/3000:  68%|██████▊   | 2027/3000 [18:05<06:34,  2.46it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.49e+6]Epoch 2027/3000:  68%|██████▊   | 2027/3000 [18:05<06:34,  2.46it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 2028/3000:  68%|██████▊   | 2027/3000 [18:05<06:34,  2.46it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 2028/3000:  68%|██████▊   | 2028/3000 [18:06<06:29,  2.49it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 2028/3000:  68%|██████▊   | 2028/3000 [18:06<06:29,  2.49it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2029/3000:  68%|██████▊   | 2028/3000 [18:06<06:29,  2.49it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2029/3000:  68%|██████▊   | 2029/3000 [18:06<06:21,  2.55it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2029/3000:  68%|██████▊   | 2029/3000 [18:06<06:21,  2.55it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.49e+6]Epoch 2030/3000:  68%|██████▊   | 2029/3000 [18:06<06:21,  2.55it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.49e+6]Epoch 2030/3000:  68%|██████▊   | 2030/3000 [18:07<06:53,  2.35it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.49e+6]Epoch 2030/3000:  68%|██████▊   | 2030/3000 [18:07<06:53,  2.35it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6] Epoch 2031/3000:  68%|██████▊   | 2030/3000 [18:07<06:53,  2.35it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 2031/3000:  68%|██████▊   | 2031/3000 [18:07<06:49,  2.37it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.49e+6]Epoch 2031/3000:  68%|██████▊   | 2031/3000 [18:07<06:49,  2.37it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2032/3000:  68%|██████▊   | 2031/3000 [18:07<06:49,  2.37it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2032/3000:  68%|██████▊   | 2032/3000 [18:08<07:00,  2.30it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.49e+6]Epoch 2032/3000:  68%|██████▊   | 2032/3000 [18:08<07:00,  2.30it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 2033/3000:  68%|██████▊   | 2032/3000 [18:08<07:00,  2.30it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 2033/3000:  68%|██████▊   | 2033/3000 [18:08<07:18,  2.20it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.49e+6]Epoch 2033/3000:  68%|██████▊   | 2033/3000 [18:08<07:18,  2.20it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.49e+6]Epoch 2034/3000:  68%|██████▊   | 2033/3000 [18:08<07:18,  2.20it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.49e+6]Epoch 2034/3000:  68%|██████▊   | 2034/3000 [18:08<07:07,  2.26it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.49e+6]Epoch 2034/3000:  68%|██████▊   | 2034/3000 [18:08<07:07,  2.26it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.48e+6]Epoch 2035/3000:  68%|██████▊   | 2034/3000 [18:08<07:07,  2.26it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.48e+6]Epoch 2035/3000:  68%|██████▊   | 2035/3000 [18:09<07:16,  2.21it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.48e+6]Epoch 2035/3000:  68%|██████▊   | 2035/3000 [18:09<07:16,  2.21it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2036/3000:  68%|██████▊   | 2035/3000 [18:09<07:16,  2.21it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2036/3000:  68%|██████▊   | 2036/3000 [18:09<06:50,  2.35it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2036/3000:  68%|██████▊   | 2036/3000 [18:09<06:50,  2.35it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.48e+6]Epoch 2037/3000:  68%|██████▊   | 2036/3000 [18:09<06:50,  2.35it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.48e+6]Epoch 2037/3000:  68%|██████▊   | 2037/3000 [18:10<06:32,  2.45it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.48e+6]Epoch 2037/3000:  68%|██████▊   | 2037/3000 [18:10<06:32,  2.45it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2038/3000:  68%|██████▊   | 2037/3000 [18:10<06:32,  2.45it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2038/3000:  68%|██████▊   | 2038/3000 [18:10<06:44,  2.38it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2038/3000:  68%|██████▊   | 2038/3000 [18:10<06:44,  2.38it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2039/3000:  68%|██████▊   | 2038/3000 [18:10<06:44,  2.38it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2039/3000:  68%|██████▊   | 2039/3000 [18:11<06:51,  2.33it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2039/3000:  68%|██████▊   | 2039/3000 [18:11<06:51,  2.33it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2040/3000:  68%|██████▊   | 2039/3000 [18:11<06:51,  2.33it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2040/3000:  68%|██████▊   | 2040/3000 [18:11<06:28,  2.47it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2040/3000:  68%|██████▊   | 2040/3000 [18:11<06:28,  2.47it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2041/3000:  68%|██████▊   | 2040/3000 [18:11<06:28,  2.47it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2041/3000:  68%|██████▊   | 2041/3000 [18:11<06:41,  2.39it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2041/3000:  68%|██████▊   | 2041/3000 [18:11<06:41,  2.39it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.48e+6]Epoch 2042/3000:  68%|██████▊   | 2041/3000 [18:11<06:41,  2.39it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.48e+6]Epoch 2042/3000:  68%|██████▊   | 2042/3000 [18:12<06:51,  2.33it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.48e+6]Epoch 2042/3000:  68%|██████▊   | 2042/3000 [18:12<06:51,  2.33it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2043/3000:  68%|██████▊   | 2042/3000 [18:12<06:51,  2.33it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2043/3000:  68%|██████▊   | 2043/3000 [18:12<06:51,  2.33it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2043/3000:  68%|██████▊   | 2043/3000 [18:12<06:51,  2.33it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.48e+6]Epoch 2044/3000:  68%|██████▊   | 2043/3000 [18:12<06:51,  2.33it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.48e+6]Epoch 2044/3000:  68%|██████▊   | 2044/3000 [18:13<06:52,  2.32it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.48e+6]Epoch 2044/3000:  68%|██████▊   | 2044/3000 [18:13<06:52,  2.32it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2045/3000:  68%|██████▊   | 2044/3000 [18:13<06:52,  2.32it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2045/3000:  68%|██████▊   | 2045/3000 [18:13<07:01,  2.27it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2045/3000:  68%|██████▊   | 2045/3000 [18:13<07:01,  2.27it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2046/3000:  68%|██████▊   | 2045/3000 [18:13<07:01,  2.27it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2046/3000:  68%|██████▊   | 2046/3000 [18:14<06:58,  2.28it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2046/3000:  68%|██████▊   | 2046/3000 [18:14<06:58,  2.28it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6] Epoch 2047/3000:  68%|██████▊   | 2046/3000 [18:14<06:58,  2.28it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2047/3000:  68%|██████▊   | 2047/3000 [18:14<06:55,  2.30it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2047/3000:  68%|██████▊   | 2047/3000 [18:14<06:55,  2.30it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2048/3000:  68%|██████▊   | 2047/3000 [18:14<06:55,  2.30it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2048/3000:  68%|██████▊   | 2048/3000 [18:14<06:58,  2.27it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2048/3000:  68%|██████▊   | 2048/3000 [18:14<06:58,  2.27it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.48e+6]Epoch 2049/3000:  68%|██████▊   | 2048/3000 [18:14<06:58,  2.27it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.48e+6]Epoch 2049/3000:  68%|██████▊   | 2049/3000 [18:15<07:02,  2.25it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.48e+6]Epoch 2049/3000:  68%|██████▊   | 2049/3000 [18:15<07:02,  2.25it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6] Epoch 2050/3000:  68%|██████▊   | 2049/3000 [18:15<07:02,  2.25it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2050/3000:  68%|██████▊   | 2050/3000 [18:15<06:55,  2.28it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2050/3000:  68%|██████▊   | 2050/3000 [18:15<06:55,  2.28it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2051/3000:  68%|██████▊   | 2050/3000 [18:15<06:55,  2.28it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2051/3000:  68%|██████▊   | 2051/3000 [18:16<07:09,  2.21it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2051/3000:  68%|██████▊   | 2051/3000 [18:16<07:09,  2.21it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2052/3000:  68%|██████▊   | 2051/3000 [18:16<07:09,  2.21it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2052/3000:  68%|██████▊   | 2052/3000 [18:16<07:19,  2.15it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2052/3000:  68%|██████▊   | 2052/3000 [18:16<07:19,  2.15it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6] Epoch 2053/3000:  68%|██████▊   | 2052/3000 [18:16<07:19,  2.15it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2053/3000:  68%|██████▊   | 2053/3000 [18:17<07:28,  2.11it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2053/3000:  68%|██████▊   | 2053/3000 [18:17<07:28,  2.11it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2054/3000:  68%|██████▊   | 2053/3000 [18:17<07:28,  2.11it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2054/3000:  68%|██████▊   | 2054/3000 [18:17<07:32,  2.09it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2054/3000:  68%|██████▊   | 2054/3000 [18:17<07:32,  2.09it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2055/3000:  68%|██████▊   | 2054/3000 [18:17<07:32,  2.09it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2055/3000:  68%|██████▊   | 2055/3000 [18:18<07:20,  2.14it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2055/3000:  68%|██████▊   | 2055/3000 [18:18<07:20,  2.14it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2056/3000:  68%|██████▊   | 2055/3000 [18:18<07:20,  2.14it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2056/3000:  69%|██████▊   | 2056/3000 [18:18<07:22,  2.13it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2056/3000:  69%|██████▊   | 2056/3000 [18:18<07:22,  2.13it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2057/3000:  69%|██████▊   | 2056/3000 [18:18<07:22,  2.13it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2057/3000:  69%|██████▊   | 2057/3000 [18:19<07:21,  2.14it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2057/3000:  69%|██████▊   | 2057/3000 [18:19<07:21,  2.14it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6] Epoch 2058/3000:  69%|██████▊   | 2057/3000 [18:19<07:21,  2.14it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2058/3000:  69%|██████▊   | 2058/3000 [18:19<06:46,  2.32it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2058/3000:  69%|██████▊   | 2058/3000 [18:19<06:46,  2.32it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2059/3000:  69%|██████▊   | 2058/3000 [18:19<06:46,  2.32it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2059/3000:  69%|██████▊   | 2059/3000 [18:19<06:38,  2.36it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2059/3000:  69%|██████▊   | 2059/3000 [18:19<06:38,  2.36it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.48e+6]Epoch 2060/3000:  69%|██████▊   | 2059/3000 [18:19<06:38,  2.36it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.48e+6]Epoch 2060/3000:  69%|██████▊   | 2060/3000 [18:20<06:39,  2.35it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.48e+6]Epoch 2060/3000:  69%|██████▊   | 2060/3000 [18:20<06:39,  2.35it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6] Epoch 2061/3000:  69%|██████▊   | 2060/3000 [18:20<06:39,  2.35it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2061/3000:  69%|██████▊   | 2061/3000 [18:20<06:58,  2.24it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2061/3000:  69%|██████▊   | 2061/3000 [18:20<06:58,  2.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.48e+6]Epoch 2062/3000:  69%|██████▊   | 2061/3000 [18:20<06:58,  2.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.48e+6]Epoch 2062/3000:  69%|██████▊   | 2062/3000 [18:21<06:59,  2.23it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.48e+6]Epoch 2062/3000:  69%|██████▊   | 2062/3000 [18:21<06:59,  2.23it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6] Epoch 2063/3000:  69%|██████▊   | 2062/3000 [18:21<06:59,  2.23it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2063/3000:  69%|██████▉   | 2063/3000 [18:21<07:00,  2.23it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2063/3000:  69%|██████▉   | 2063/3000 [18:21<07:00,  2.23it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2064/3000:  69%|██████▉   | 2063/3000 [18:21<07:00,  2.23it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2064/3000:  69%|██████▉   | 2064/3000 [18:22<07:02,  2.22it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2064/3000:  69%|██████▉   | 2064/3000 [18:22<07:02,  2.22it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2065/3000:  69%|██████▉   | 2064/3000 [18:22<07:02,  2.22it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2065/3000:  69%|██████▉   | 2065/3000 [18:22<06:56,  2.25it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2065/3000:  69%|██████▉   | 2065/3000 [18:22<06:56,  2.25it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.48e+6]Epoch 2066/3000:  69%|██████▉   | 2065/3000 [18:22<06:56,  2.25it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.48e+6]Epoch 2066/3000:  69%|██████▉   | 2066/3000 [18:23<06:55,  2.25it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.48e+6]Epoch 2066/3000:  69%|██████▉   | 2066/3000 [18:23<06:55,  2.25it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2067/3000:  69%|██████▉   | 2066/3000 [18:23<06:55,  2.25it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2067/3000:  69%|██████▉   | 2067/3000 [18:23<06:30,  2.39it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2067/3000:  69%|██████▉   | 2067/3000 [18:23<06:30,  2.39it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6] Epoch 2068/3000:  69%|██████▉   | 2067/3000 [18:23<06:30,  2.39it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2068/3000:  69%|██████▉   | 2068/3000 [18:23<05:57,  2.61it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2068/3000:  69%|██████▉   | 2068/3000 [18:23<05:57,  2.61it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2069/3000:  69%|██████▉   | 2068/3000 [18:23<05:57,  2.61it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2069/3000:  69%|██████▉   | 2069/3000 [18:24<06:01,  2.57it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2069/3000:  69%|██████▉   | 2069/3000 [18:24<06:01,  2.57it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2070/3000:  69%|██████▉   | 2069/3000 [18:24<06:01,  2.57it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2070/3000:  69%|██████▉   | 2070/3000 [18:24<06:12,  2.49it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2070/3000:  69%|██████▉   | 2070/3000 [18:24<06:12,  2.49it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2071/3000:  69%|██████▉   | 2070/3000 [18:24<06:12,  2.49it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2071/3000:  69%|██████▉   | 2071/3000 [18:24<06:29,  2.39it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2071/3000:  69%|██████▉   | 2071/3000 [18:24<06:29,  2.39it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6] Epoch 2072/3000:  69%|██████▉   | 2071/3000 [18:25<06:29,  2.39it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2072/3000:  69%|██████▉   | 2072/3000 [18:25<06:25,  2.41it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2072/3000:  69%|██████▉   | 2072/3000 [18:25<06:25,  2.41it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2073/3000:  69%|██████▉   | 2072/3000 [18:25<06:25,  2.41it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2073/3000:  69%|██████▉   | 2073/3000 [18:25<06:33,  2.36it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2073/3000:  69%|██████▉   | 2073/3000 [18:25<06:33,  2.36it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.48e+6]Epoch 2074/3000:  69%|██████▉   | 2073/3000 [18:25<06:33,  2.36it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.48e+6]Epoch 2074/3000:  69%|██████▉   | 2074/3000 [18:26<06:28,  2.38it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.48e+6]Epoch 2074/3000:  69%|██████▉   | 2074/3000 [18:26<06:28,  2.38it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.48e+6]Epoch 2075/3000:  69%|██████▉   | 2074/3000 [18:26<06:28,  2.38it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.48e+6]Epoch 2075/3000:  69%|██████▉   | 2075/3000 [18:26<06:39,  2.32it/s, v_num=1, train_loss_step=1.54e+6, train_loss_epoch=1.48e+6]Epoch 2075/3000:  69%|██████▉   | 2075/3000 [18:26<06:39,  2.32it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.48e+6]Epoch 2076/3000:  69%|██████▉   | 2075/3000 [18:26<06:39,  2.32it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.48e+6]Epoch 2076/3000:  69%|██████▉   | 2076/3000 [18:27<06:59,  2.20it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.48e+6]Epoch 2076/3000:  69%|██████▉   | 2076/3000 [18:27<06:59,  2.20it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2077/3000:  69%|██████▉   | 2076/3000 [18:27<06:59,  2.20it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2077/3000:  69%|██████▉   | 2077/3000 [18:27<06:58,  2.20it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2077/3000:  69%|██████▉   | 2077/3000 [18:27<06:58,  2.20it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2078/3000:  69%|██████▉   | 2077/3000 [18:27<06:58,  2.20it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2078/3000:  69%|██████▉   | 2078/3000 [18:28<07:07,  2.16it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2078/3000:  69%|██████▉   | 2078/3000 [18:28<07:07,  2.16it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2079/3000:  69%|██████▉   | 2078/3000 [18:28<07:07,  2.16it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2079/3000:  69%|██████▉   | 2079/3000 [18:28<07:11,  2.14it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2079/3000:  69%|██████▉   | 2079/3000 [18:28<07:11,  2.14it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.48e+6]Epoch 2080/3000:  69%|██████▉   | 2079/3000 [18:28<07:11,  2.14it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.48e+6]Epoch 2080/3000:  69%|██████▉   | 2080/3000 [18:29<06:45,  2.27it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.48e+6]Epoch 2080/3000:  69%|██████▉   | 2080/3000 [18:29<06:45,  2.27it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.48e+6]Epoch 2081/3000:  69%|██████▉   | 2080/3000 [18:29<06:45,  2.27it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.48e+6]Epoch 2081/3000:  69%|██████▉   | 2081/3000 [18:29<06:32,  2.34it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.48e+6]Epoch 2081/3000:  69%|██████▉   | 2081/3000 [18:29<06:32,  2.34it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2082/3000:  69%|██████▉   | 2081/3000 [18:29<06:32,  2.34it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2082/3000:  69%|██████▉   | 2082/3000 [18:29<06:40,  2.29it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2082/3000:  69%|██████▉   | 2082/3000 [18:29<06:40,  2.29it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2083/3000:  69%|██████▉   | 2082/3000 [18:29<06:40,  2.29it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2083/3000:  69%|██████▉   | 2083/3000 [18:30<06:41,  2.29it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2083/3000:  69%|██████▉   | 2083/3000 [18:30<06:41,  2.29it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2084/3000:  69%|██████▉   | 2083/3000 [18:30<06:41,  2.29it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2084/3000:  69%|██████▉   | 2084/3000 [18:30<06:33,  2.32it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2084/3000:  69%|██████▉   | 2084/3000 [18:30<06:33,  2.32it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.48e+6]Epoch 2085/3000:  69%|██████▉   | 2084/3000 [18:30<06:33,  2.32it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.48e+6]Epoch 2085/3000:  70%|██████▉   | 2085/3000 [18:31<06:24,  2.38it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.48e+6]Epoch 2085/3000:  70%|██████▉   | 2085/3000 [18:31<06:24,  2.38it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2086/3000:  70%|██████▉   | 2085/3000 [18:31<06:24,  2.38it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2086/3000:  70%|██████▉   | 2086/3000 [18:31<06:12,  2.45it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2086/3000:  70%|██████▉   | 2086/3000 [18:31<06:12,  2.45it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2087/3000:  70%|██████▉   | 2086/3000 [18:31<06:12,  2.45it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2087/3000:  70%|██████▉   | 2087/3000 [18:31<06:17,  2.42it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2087/3000:  70%|██████▉   | 2087/3000 [18:31<06:17,  2.42it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.48e+6]Epoch 2088/3000:  70%|██████▉   | 2087/3000 [18:31<06:17,  2.42it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.48e+6]Epoch 2088/3000:  70%|██████▉   | 2088/3000 [18:32<06:25,  2.37it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.48e+6]Epoch 2088/3000:  70%|██████▉   | 2088/3000 [18:32<06:25,  2.37it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2089/3000:  70%|██████▉   | 2088/3000 [18:32<06:25,  2.37it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2089/3000:  70%|██████▉   | 2089/3000 [18:32<06:22,  2.38it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2089/3000:  70%|██████▉   | 2089/3000 [18:32<06:22,  2.38it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2090/3000:  70%|██████▉   | 2089/3000 [18:32<06:22,  2.38it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2090/3000:  70%|██████▉   | 2090/3000 [18:33<06:27,  2.35it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2090/3000:  70%|██████▉   | 2090/3000 [18:33<06:27,  2.35it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2091/3000:  70%|██████▉   | 2090/3000 [18:33<06:27,  2.35it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2091/3000:  70%|██████▉   | 2091/3000 [18:33<06:14,  2.43it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2091/3000:  70%|██████▉   | 2091/3000 [18:33<06:14,  2.43it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.48e+6]Epoch 2092/3000:  70%|██████▉   | 2091/3000 [18:33<06:14,  2.43it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.48e+6]Epoch 2092/3000:  70%|██████▉   | 2092/3000 [18:34<06:19,  2.39it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.48e+6]Epoch 2092/3000:  70%|██████▉   | 2092/3000 [18:34<06:19,  2.39it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2093/3000:  70%|██████▉   | 2092/3000 [18:34<06:19,  2.39it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2093/3000:  70%|██████▉   | 2093/3000 [18:34<06:36,  2.29it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2093/3000:  70%|██████▉   | 2093/3000 [18:34<06:36,  2.29it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2094/3000:  70%|██████▉   | 2093/3000 [18:34<06:36,  2.29it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2094/3000:  70%|██████▉   | 2094/3000 [18:34<06:38,  2.27it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2094/3000:  70%|██████▉   | 2094/3000 [18:34<06:38,  2.27it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.48e+6]Epoch 2095/3000:  70%|██████▉   | 2094/3000 [18:34<06:38,  2.27it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.48e+6]Epoch 2095/3000:  70%|██████▉   | 2095/3000 [18:35<06:47,  2.22it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.48e+6]Epoch 2095/3000:  70%|██████▉   | 2095/3000 [18:35<06:47,  2.22it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2096/3000:  70%|██████▉   | 2095/3000 [18:35<06:47,  2.22it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2096/3000:  70%|██████▉   | 2096/3000 [18:35<06:43,  2.24it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2096/3000:  70%|██████▉   | 2096/3000 [18:35<06:43,  2.24it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2097/3000:  70%|██████▉   | 2096/3000 [18:35<06:43,  2.24it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2097/3000:  70%|██████▉   | 2097/3000 [18:36<06:24,  2.35it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2097/3000:  70%|██████▉   | 2097/3000 [18:36<06:24,  2.35it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6] Epoch 2098/3000:  70%|██████▉   | 2097/3000 [18:36<06:24,  2.35it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2098/3000:  70%|██████▉   | 2098/3000 [18:36<06:23,  2.35it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2098/3000:  70%|██████▉   | 2098/3000 [18:36<06:23,  2.35it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.48e+6]Epoch 2099/3000:  70%|██████▉   | 2098/3000 [18:36<06:23,  2.35it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.48e+6]Epoch 2099/3000:  70%|██████▉   | 2099/3000 [18:37<06:40,  2.25it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.48e+6]Epoch 2099/3000:  70%|██████▉   | 2099/3000 [18:37<06:40,  2.25it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2100/3000:  70%|██████▉   | 2099/3000 [18:37<06:40,  2.25it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2100/3000:  70%|███████   | 2100/3000 [18:37<06:53,  2.17it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2100/3000:  70%|███████   | 2100/3000 [18:37<06:53,  2.17it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.48e+6]Epoch 2101/3000:  70%|███████   | 2100/3000 [18:37<06:53,  2.17it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.48e+6]Epoch 2101/3000:  70%|███████   | 2101/3000 [18:38<06:38,  2.26it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.48e+6]Epoch 2101/3000:  70%|███████   | 2101/3000 [18:38<06:38,  2.26it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6] Epoch 2102/3000:  70%|███████   | 2101/3000 [18:38<06:38,  2.26it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2102/3000:  70%|███████   | 2102/3000 [18:38<06:57,  2.15it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2102/3000:  70%|███████   | 2102/3000 [18:38<06:57,  2.15it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2103/3000:  70%|███████   | 2102/3000 [18:38<06:57,  2.15it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2103/3000:  70%|███████   | 2103/3000 [18:38<06:32,  2.29it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2103/3000:  70%|███████   | 2103/3000 [18:38<06:32,  2.29it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2104/3000:  70%|███████   | 2103/3000 [18:38<06:32,  2.29it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2104/3000:  70%|███████   | 2104/3000 [18:39<06:27,  2.31it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2104/3000:  70%|███████   | 2104/3000 [18:39<06:27,  2.31it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6] Epoch 2105/3000:  70%|███████   | 2104/3000 [18:39<06:27,  2.31it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2105/3000:  70%|███████   | 2105/3000 [18:39<06:46,  2.20it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2105/3000:  70%|███████   | 2105/3000 [18:39<06:46,  2.20it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2106/3000:  70%|███████   | 2105/3000 [18:39<06:46,  2.20it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2106/3000:  70%|███████   | 2106/3000 [18:40<06:36,  2.25it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2106/3000:  70%|███████   | 2106/3000 [18:40<06:36,  2.25it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2107/3000:  70%|███████   | 2106/3000 [18:40<06:36,  2.25it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2107/3000:  70%|███████   | 2107/3000 [18:40<06:29,  2.30it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2107/3000:  70%|███████   | 2107/3000 [18:40<06:29,  2.30it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2108/3000:  70%|███████   | 2107/3000 [18:40<06:29,  2.30it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2108/3000:  70%|███████   | 2108/3000 [18:41<06:41,  2.22it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2108/3000:  70%|███████   | 2108/3000 [18:41<06:41,  2.22it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.48e+6]Epoch 2109/3000:  70%|███████   | 2108/3000 [18:41<06:41,  2.22it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.48e+6]Epoch 2109/3000:  70%|███████   | 2109/3000 [18:41<06:34,  2.26it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.48e+6]Epoch 2109/3000:  70%|███████   | 2109/3000 [18:41<06:34,  2.26it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2110/3000:  70%|███████   | 2109/3000 [18:41<06:34,  2.26it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2110/3000:  70%|███████   | 2110/3000 [18:42<06:32,  2.27it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2110/3000:  70%|███████   | 2110/3000 [18:42<06:32,  2.27it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2111/3000:  70%|███████   | 2110/3000 [18:42<06:32,  2.27it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2111/3000:  70%|███████   | 2111/3000 [18:42<06:52,  2.16it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2111/3000:  70%|███████   | 2111/3000 [18:42<06:52,  2.16it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2112/3000:  70%|███████   | 2111/3000 [18:42<06:52,  2.16it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2112/3000:  70%|███████   | 2112/3000 [18:43<06:39,  2.22it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2112/3000:  70%|███████   | 2112/3000 [18:43<06:39,  2.22it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2113/3000:  70%|███████   | 2112/3000 [18:43<06:39,  2.22it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2113/3000:  70%|███████   | 2113/3000 [18:43<06:16,  2.36it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2113/3000:  70%|███████   | 2113/3000 [18:43<06:16,  2.36it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.48e+6]Epoch 2114/3000:  70%|███████   | 2113/3000 [18:43<06:16,  2.36it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.48e+6]Epoch 2114/3000:  70%|███████   | 2114/3000 [18:43<06:09,  2.40it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.48e+6]Epoch 2114/3000:  70%|███████   | 2114/3000 [18:43<06:09,  2.40it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2115/3000:  70%|███████   | 2114/3000 [18:43<06:09,  2.40it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2115/3000:  70%|███████   | 2115/3000 [18:44<06:13,  2.37it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.48e+6]Epoch 2115/3000:  70%|███████   | 2115/3000 [18:44<06:13,  2.37it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2116/3000:  70%|███████   | 2115/3000 [18:44<06:13,  2.37it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2116/3000:  71%|███████   | 2116/3000 [18:44<06:03,  2.43it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2116/3000:  71%|███████   | 2116/3000 [18:44<06:03,  2.43it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2117/3000:  71%|███████   | 2116/3000 [18:44<06:03,  2.43it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2117/3000:  71%|███████   | 2117/3000 [18:45<06:21,  2.32it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2117/3000:  71%|███████   | 2117/3000 [18:45<06:21,  2.32it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2118/3000:  71%|███████   | 2117/3000 [18:45<06:21,  2.32it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2118/3000:  71%|███████   | 2118/3000 [18:45<06:19,  2.33it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2118/3000:  71%|███████   | 2118/3000 [18:45<06:19,  2.33it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2119/3000:  71%|███████   | 2118/3000 [18:45<06:19,  2.33it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2119/3000:  71%|███████   | 2119/3000 [18:45<06:14,  2.35it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2119/3000:  71%|███████   | 2119/3000 [18:45<06:14,  2.35it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6] Epoch 2120/3000:  71%|███████   | 2119/3000 [18:45<06:14,  2.35it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2120/3000:  71%|███████   | 2120/3000 [18:46<06:14,  2.35it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2120/3000:  71%|███████   | 2120/3000 [18:46<06:14,  2.35it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2121/3000:  71%|███████   | 2120/3000 [18:46<06:14,  2.35it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2121/3000:  71%|███████   | 2121/3000 [18:46<06:08,  2.39it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2121/3000:  71%|███████   | 2121/3000 [18:46<06:08,  2.39it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.48e+6]Epoch 2122/3000:  71%|███████   | 2121/3000 [18:46<06:08,  2.39it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.48e+6]Epoch 2122/3000:  71%|███████   | 2122/3000 [18:47<06:25,  2.27it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.48e+6]Epoch 2122/3000:  71%|███████   | 2122/3000 [18:47<06:25,  2.27it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.48e+6]Epoch 2123/3000:  71%|███████   | 2122/3000 [18:47<06:25,  2.27it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.48e+6]Epoch 2123/3000:  71%|███████   | 2123/3000 [18:47<06:25,  2.28it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.48e+6]Epoch 2123/3000:  71%|███████   | 2123/3000 [18:47<06:25,  2.28it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6] Epoch 2124/3000:  71%|███████   | 2123/3000 [18:47<06:25,  2.28it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2124/3000:  71%|███████   | 2124/3000 [18:48<06:02,  2.42it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.48e+6]Epoch 2124/3000:  71%|███████   | 2124/3000 [18:48<06:02,  2.42it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2125/3000:  71%|███████   | 2124/3000 [18:48<06:02,  2.42it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2125/3000:  71%|███████   | 2125/3000 [18:48<05:59,  2.43it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2125/3000:  71%|███████   | 2125/3000 [18:48<05:59,  2.43it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2126/3000:  71%|███████   | 2125/3000 [18:48<05:59,  2.43it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2126/3000:  71%|███████   | 2126/3000 [18:48<06:12,  2.34it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.48e+6]Epoch 2126/3000:  71%|███████   | 2126/3000 [18:48<06:12,  2.34it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.48e+6]Epoch 2127/3000:  71%|███████   | 2126/3000 [18:48<06:12,  2.34it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.48e+6]Epoch 2127/3000:  71%|███████   | 2127/3000 [18:49<06:04,  2.39it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.48e+6]Epoch 2127/3000:  71%|███████   | 2127/3000 [18:49<06:04,  2.39it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2128/3000:  71%|███████   | 2127/3000 [18:49<06:04,  2.39it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2128/3000:  71%|███████   | 2128/3000 [18:49<05:53,  2.47it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.48e+6]Epoch 2128/3000:  71%|███████   | 2128/3000 [18:49<05:53,  2.47it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2129/3000:  71%|███████   | 2128/3000 [18:49<05:53,  2.47it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2129/3000:  71%|███████   | 2129/3000 [18:50<05:50,  2.49it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.48e+6]Epoch 2129/3000:  71%|███████   | 2129/3000 [18:50<05:50,  2.49it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2130/3000:  71%|███████   | 2129/3000 [18:50<05:50,  2.49it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2130/3000:  71%|███████   | 2130/3000 [18:50<06:11,  2.34it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.48e+6]Epoch 2130/3000:  71%|███████   | 2130/3000 [18:50<06:11,  2.34it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.47e+6]Epoch 2131/3000:  71%|███████   | 2130/3000 [18:50<06:11,  2.34it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.47e+6]Epoch 2131/3000:  71%|███████   | 2131/3000 [18:50<06:07,  2.37it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.47e+6]Epoch 2131/3000:  71%|███████   | 2131/3000 [18:50<06:07,  2.37it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2132/3000:  71%|███████   | 2131/3000 [18:50<06:07,  2.37it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2132/3000:  71%|███████   | 2132/3000 [18:51<06:24,  2.26it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2132/3000:  71%|███████   | 2132/3000 [18:51<06:24,  2.26it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.47e+6]Epoch 2133/3000:  71%|███████   | 2132/3000 [18:51<06:24,  2.26it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.47e+6]Epoch 2133/3000:  71%|███████   | 2133/3000 [18:51<06:20,  2.28it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.47e+6]Epoch 2133/3000:  71%|███████   | 2133/3000 [18:51<06:20,  2.28it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2134/3000:  71%|███████   | 2133/3000 [18:51<06:20,  2.28it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2134/3000:  71%|███████   | 2134/3000 [18:52<06:27,  2.23it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2134/3000:  71%|███████   | 2134/3000 [18:52<06:27,  2.23it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.47e+6]Epoch 2135/3000:  71%|███████   | 2134/3000 [18:52<06:27,  2.23it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.47e+6]Epoch 2135/3000:  71%|███████   | 2135/3000 [18:52<06:31,  2.21it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.47e+6]Epoch 2135/3000:  71%|███████   | 2135/3000 [18:52<06:31,  2.21it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2136/3000:  71%|███████   | 2135/3000 [18:52<06:31,  2.21it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2136/3000:  71%|███████   | 2136/3000 [18:53<06:08,  2.34it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2136/3000:  71%|███████   | 2136/3000 [18:53<06:08,  2.34it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2137/3000:  71%|███████   | 2136/3000 [18:53<06:08,  2.34it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2137/3000:  71%|███████   | 2137/3000 [18:53<06:01,  2.39it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2137/3000:  71%|███████   | 2137/3000 [18:53<06:01,  2.39it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6] Epoch 2138/3000:  71%|███████   | 2137/3000 [18:53<06:01,  2.39it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6]Epoch 2138/3000:  71%|███████▏  | 2138/3000 [18:54<06:11,  2.32it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6]Epoch 2138/3000:  71%|███████▏  | 2138/3000 [18:54<06:11,  2.32it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2139/3000:  71%|███████▏  | 2138/3000 [18:54<06:11,  2.32it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2139/3000:  71%|███████▏  | 2139/3000 [18:54<05:58,  2.40it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2139/3000:  71%|███████▏  | 2139/3000 [18:54<05:58,  2.40it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2140/3000:  71%|███████▏  | 2139/3000 [18:54<05:58,  2.40it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2140/3000:  71%|███████▏  | 2140/3000 [18:54<05:34,  2.57it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2140/3000:  71%|███████▏  | 2140/3000 [18:54<05:34,  2.57it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2141/3000:  71%|███████▏  | 2140/3000 [18:54<05:34,  2.57it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2141/3000:  71%|███████▏  | 2141/3000 [18:55<05:56,  2.41it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2141/3000:  71%|███████▏  | 2141/3000 [18:55<05:56,  2.41it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2142/3000:  71%|███████▏  | 2141/3000 [18:55<05:56,  2.41it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2142/3000:  71%|███████▏  | 2142/3000 [18:55<06:12,  2.31it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2142/3000:  71%|███████▏  | 2142/3000 [18:55<06:12,  2.31it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2143/3000:  71%|███████▏  | 2142/3000 [18:55<06:12,  2.31it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2143/3000:  71%|███████▏  | 2143/3000 [18:56<06:19,  2.26it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2143/3000:  71%|███████▏  | 2143/3000 [18:56<06:19,  2.26it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2144/3000:  71%|███████▏  | 2143/3000 [18:56<06:19,  2.26it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2144/3000:  71%|███████▏  | 2144/3000 [18:56<06:12,  2.30it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2144/3000:  71%|███████▏  | 2144/3000 [18:56<06:12,  2.30it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2145/3000:  71%|███████▏  | 2144/3000 [18:56<06:12,  2.30it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2145/3000:  72%|███████▏  | 2145/3000 [18:57<06:19,  2.25it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2145/3000:  72%|███████▏  | 2145/3000 [18:57<06:19,  2.25it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2146/3000:  72%|███████▏  | 2145/3000 [18:57<06:19,  2.25it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2146/3000:  72%|███████▏  | 2146/3000 [18:57<06:22,  2.23it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2146/3000:  72%|███████▏  | 2146/3000 [18:57<06:22,  2.23it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2147/3000:  72%|███████▏  | 2146/3000 [18:57<06:22,  2.23it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2147/3000:  72%|███████▏  | 2147/3000 [18:57<06:22,  2.23it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2147/3000:  72%|███████▏  | 2147/3000 [18:57<06:22,  2.23it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2148/3000:  72%|███████▏  | 2147/3000 [18:57<06:22,  2.23it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2148/3000:  72%|███████▏  | 2148/3000 [18:58<06:28,  2.19it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2148/3000:  72%|███████▏  | 2148/3000 [18:58<06:28,  2.19it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.47e+6]Epoch 2149/3000:  72%|███████▏  | 2148/3000 [18:58<06:28,  2.19it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.47e+6]Epoch 2149/3000:  72%|███████▏  | 2149/3000 [18:58<06:39,  2.13it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.47e+6]Epoch 2149/3000:  72%|███████▏  | 2149/3000 [18:58<06:39,  2.13it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2150/3000:  72%|███████▏  | 2149/3000 [18:58<06:39,  2.13it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2150/3000:  72%|███████▏  | 2150/3000 [18:59<06:20,  2.23it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2150/3000:  72%|███████▏  | 2150/3000 [18:59<06:20,  2.23it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2151/3000:  72%|███████▏  | 2150/3000 [18:59<06:20,  2.23it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2151/3000:  72%|███████▏  | 2151/3000 [18:59<06:11,  2.28it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2151/3000:  72%|███████▏  | 2151/3000 [18:59<06:11,  2.28it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2152/3000:  72%|███████▏  | 2151/3000 [18:59<06:11,  2.28it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2152/3000:  72%|███████▏  | 2152/3000 [19:00<06:07,  2.31it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2152/3000:  72%|███████▏  | 2152/3000 [19:00<06:07,  2.31it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2153/3000:  72%|███████▏  | 2152/3000 [19:00<06:07,  2.31it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2153/3000:  72%|███████▏  | 2153/3000 [19:00<06:17,  2.24it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2153/3000:  72%|███████▏  | 2153/3000 [19:00<06:17,  2.24it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2154/3000:  72%|███████▏  | 2153/3000 [19:00<06:17,  2.24it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2154/3000:  72%|███████▏  | 2154/3000 [19:01<06:43,  2.10it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2154/3000:  72%|███████▏  | 2154/3000 [19:01<06:43,  2.10it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.47e+6]Epoch 2155/3000:  72%|███████▏  | 2154/3000 [19:01<06:43,  2.10it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.47e+6]Epoch 2155/3000:  72%|███████▏  | 2155/3000 [19:01<06:45,  2.08it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.47e+6]Epoch 2155/3000:  72%|███████▏  | 2155/3000 [19:01<06:45,  2.08it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2156/3000:  72%|███████▏  | 2155/3000 [19:01<06:45,  2.08it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2156/3000:  72%|███████▏  | 2156/3000 [19:02<06:31,  2.15it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2156/3000:  72%|███████▏  | 2156/3000 [19:02<06:31,  2.15it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2157/3000:  72%|███████▏  | 2156/3000 [19:02<06:31,  2.15it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2157/3000:  72%|███████▏  | 2157/3000 [19:02<06:10,  2.28it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2157/3000:  72%|███████▏  | 2157/3000 [19:02<06:10,  2.28it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2158/3000:  72%|███████▏  | 2157/3000 [19:02<06:10,  2.28it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2158/3000:  72%|███████▏  | 2158/3000 [19:02<06:19,  2.22it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2158/3000:  72%|███████▏  | 2158/3000 [19:02<06:19,  2.22it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6] Epoch 2159/3000:  72%|███████▏  | 2158/3000 [19:02<06:19,  2.22it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6]Epoch 2159/3000:  72%|███████▏  | 2159/3000 [19:03<06:15,  2.24it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6]Epoch 2159/3000:  72%|███████▏  | 2159/3000 [19:03<06:15,  2.24it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.47e+6]Epoch 2160/3000:  72%|███████▏  | 2159/3000 [19:03<06:15,  2.24it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.47e+6]Epoch 2160/3000:  72%|███████▏  | 2160/3000 [19:03<06:04,  2.31it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.47e+6]Epoch 2160/3000:  72%|███████▏  | 2160/3000 [19:03<06:04,  2.31it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2161/3000:  72%|███████▏  | 2160/3000 [19:03<06:04,  2.31it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2161/3000:  72%|███████▏  | 2161/3000 [19:04<06:03,  2.31it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2161/3000:  72%|███████▏  | 2161/3000 [19:04<06:03,  2.31it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2162/3000:  72%|███████▏  | 2161/3000 [19:04<06:03,  2.31it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2162/3000:  72%|███████▏  | 2162/3000 [19:04<05:58,  2.34it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2162/3000:  72%|███████▏  | 2162/3000 [19:04<05:58,  2.34it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2163/3000:  72%|███████▏  | 2162/3000 [19:04<05:58,  2.34it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2163/3000:  72%|███████▏  | 2163/3000 [19:05<05:54,  2.36it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2163/3000:  72%|███████▏  | 2163/3000 [19:05<05:54,  2.36it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2164/3000:  72%|███████▏  | 2163/3000 [19:05<05:54,  2.36it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2164/3000:  72%|███████▏  | 2164/3000 [19:05<05:51,  2.38it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2164/3000:  72%|███████▏  | 2164/3000 [19:05<05:51,  2.38it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6] Epoch 2165/3000:  72%|███████▏  | 2164/3000 [19:05<05:51,  2.38it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6]Epoch 2165/3000:  72%|███████▏  | 2165/3000 [19:05<05:58,  2.33it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6]Epoch 2165/3000:  72%|███████▏  | 2165/3000 [19:05<05:58,  2.33it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2166/3000:  72%|███████▏  | 2165/3000 [19:05<05:58,  2.33it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2166/3000:  72%|███████▏  | 2166/3000 [19:06<06:12,  2.24it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2166/3000:  72%|███████▏  | 2166/3000 [19:06<06:12,  2.24it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6] Epoch 2167/3000:  72%|███████▏  | 2166/3000 [19:06<06:12,  2.24it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6]Epoch 2167/3000:  72%|███████▏  | 2167/3000 [19:06<05:56,  2.34it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6]Epoch 2167/3000:  72%|███████▏  | 2167/3000 [19:06<05:56,  2.34it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2168/3000:  72%|███████▏  | 2167/3000 [19:06<05:56,  2.34it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2168/3000:  72%|███████▏  | 2168/3000 [19:07<05:56,  2.33it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2168/3000:  72%|███████▏  | 2168/3000 [19:07<05:56,  2.33it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.47e+6]Epoch 2169/3000:  72%|███████▏  | 2168/3000 [19:07<05:56,  2.33it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.47e+6]Epoch 2169/3000:  72%|███████▏  | 2169/3000 [19:07<05:44,  2.41it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.47e+6]Epoch 2169/3000:  72%|███████▏  | 2169/3000 [19:07<05:44,  2.41it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2170/3000:  72%|███████▏  | 2169/3000 [19:07<05:44,  2.41it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2170/3000:  72%|███████▏  | 2170/3000 [19:07<05:39,  2.45it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2170/3000:  72%|███████▏  | 2170/3000 [19:07<05:39,  2.45it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2171/3000:  72%|███████▏  | 2170/3000 [19:07<05:39,  2.45it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2171/3000:  72%|███████▏  | 2171/3000 [19:08<05:38,  2.45it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2171/3000:  72%|███████▏  | 2171/3000 [19:08<05:38,  2.45it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6] Epoch 2172/3000:  72%|███████▏  | 2171/3000 [19:08<05:38,  2.45it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6]Epoch 2172/3000:  72%|███████▏  | 2172/3000 [19:08<05:50,  2.36it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6]Epoch 2172/3000:  72%|███████▏  | 2172/3000 [19:08<05:50,  2.36it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2173/3000:  72%|███████▏  | 2172/3000 [19:08<05:50,  2.36it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2173/3000:  72%|███████▏  | 2173/3000 [19:09<05:40,  2.43it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2173/3000:  72%|███████▏  | 2173/3000 [19:09<05:40,  2.43it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.47e+6]Epoch 2174/3000:  72%|███████▏  | 2173/3000 [19:09<05:40,  2.43it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.47e+6]Epoch 2174/3000:  72%|███████▏  | 2174/3000 [19:09<05:49,  2.36it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.47e+6]Epoch 2174/3000:  72%|███████▏  | 2174/3000 [19:09<05:49,  2.36it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.47e+6]Epoch 2175/3000:  72%|███████▏  | 2174/3000 [19:09<05:49,  2.36it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.47e+6]Epoch 2175/3000:  72%|███████▎  | 2175/3000 [19:10<05:53,  2.33it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.47e+6]Epoch 2175/3000:  72%|███████▎  | 2175/3000 [19:10<05:53,  2.33it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2176/3000:  72%|███████▎  | 2175/3000 [19:10<05:53,  2.33it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2176/3000:  73%|███████▎  | 2176/3000 [19:10<05:59,  2.29it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2176/3000:  73%|███████▎  | 2176/3000 [19:10<05:59,  2.29it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6] Epoch 2177/3000:  73%|███████▎  | 2176/3000 [19:10<05:59,  2.29it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6]Epoch 2177/3000:  73%|███████▎  | 2177/3000 [19:11<06:04,  2.26it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6]Epoch 2177/3000:  73%|███████▎  | 2177/3000 [19:11<06:04,  2.26it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.47e+6]Epoch 2178/3000:  73%|███████▎  | 2177/3000 [19:11<06:04,  2.26it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.47e+6]Epoch 2178/3000:  73%|███████▎  | 2178/3000 [19:11<06:27,  2.12it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.47e+6]Epoch 2178/3000:  73%|███████▎  | 2178/3000 [19:11<06:27,  2.12it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.47e+6]Epoch 2179/3000:  73%|███████▎  | 2178/3000 [19:11<06:27,  2.12it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.47e+6]Epoch 2179/3000:  73%|███████▎  | 2179/3000 [19:12<06:18,  2.17it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.47e+6]Epoch 2179/3000:  73%|███████▎  | 2179/3000 [19:12<06:18,  2.17it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2180/3000:  73%|███████▎  | 2179/3000 [19:12<06:18,  2.17it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2180/3000:  73%|███████▎  | 2180/3000 [19:12<05:58,  2.29it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2180/3000:  73%|███████▎  | 2180/3000 [19:12<05:58,  2.29it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2181/3000:  73%|███████▎  | 2180/3000 [19:12<05:58,  2.29it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2181/3000:  73%|███████▎  | 2181/3000 [19:12<05:58,  2.28it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2181/3000:  73%|███████▎  | 2181/3000 [19:12<05:58,  2.28it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2182/3000:  73%|███████▎  | 2181/3000 [19:12<05:58,  2.28it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2182/3000:  73%|███████▎  | 2182/3000 [19:13<05:51,  2.33it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2182/3000:  73%|███████▎  | 2182/3000 [19:13<05:51,  2.33it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.47e+6]Epoch 2183/3000:  73%|███████▎  | 2182/3000 [19:13<05:51,  2.33it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.47e+6]Epoch 2183/3000:  73%|███████▎  | 2183/3000 [19:13<05:46,  2.36it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.47e+6]Epoch 2183/3000:  73%|███████▎  | 2183/3000 [19:13<05:46,  2.36it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2184/3000:  73%|███████▎  | 2183/3000 [19:13<05:46,  2.36it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2184/3000:  73%|███████▎  | 2184/3000 [19:14<05:41,  2.39it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2184/3000:  73%|███████▎  | 2184/3000 [19:14<05:41,  2.39it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2185/3000:  73%|███████▎  | 2184/3000 [19:14<05:41,  2.39it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2185/3000:  73%|███████▎  | 2185/3000 [19:14<05:51,  2.32it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2185/3000:  73%|███████▎  | 2185/3000 [19:14<05:51,  2.32it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2186/3000:  73%|███████▎  | 2185/3000 [19:14<05:51,  2.32it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2186/3000:  73%|███████▎  | 2186/3000 [19:14<05:47,  2.34it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2186/3000:  73%|███████▎  | 2186/3000 [19:14<05:47,  2.34it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.47e+6]Epoch 2187/3000:  73%|███████▎  | 2186/3000 [19:14<05:47,  2.34it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.47e+6]Epoch 2187/3000:  73%|███████▎  | 2187/3000 [19:15<05:47,  2.34it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.47e+6]Epoch 2187/3000:  73%|███████▎  | 2187/3000 [19:15<05:47,  2.34it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2188/3000:  73%|███████▎  | 2187/3000 [19:15<05:47,  2.34it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2188/3000:  73%|███████▎  | 2188/3000 [19:15<05:50,  2.32it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2188/3000:  73%|███████▎  | 2188/3000 [19:15<05:50,  2.32it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2189/3000:  73%|███████▎  | 2188/3000 [19:15<05:50,  2.32it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2189/3000:  73%|███████▎  | 2189/3000 [19:16<05:36,  2.41it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2189/3000:  73%|███████▎  | 2189/3000 [19:16<05:36,  2.41it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.47e+6]Epoch 2190/3000:  73%|███████▎  | 2189/3000 [19:16<05:36,  2.41it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.47e+6]Epoch 2190/3000:  73%|███████▎  | 2190/3000 [19:16<05:46,  2.34it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.47e+6]Epoch 2190/3000:  73%|███████▎  | 2190/3000 [19:16<05:46,  2.34it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2191/3000:  73%|███████▎  | 2190/3000 [19:16<05:46,  2.34it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2191/3000:  73%|███████▎  | 2191/3000 [19:17<05:52,  2.29it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2191/3000:  73%|███████▎  | 2191/3000 [19:17<05:52,  2.29it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.47e+6]Epoch 2192/3000:  73%|███████▎  | 2191/3000 [19:17<05:52,  2.29it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.47e+6]Epoch 2192/3000:  73%|███████▎  | 2192/3000 [19:17<05:32,  2.43it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.47e+6]Epoch 2192/3000:  73%|███████▎  | 2192/3000 [19:17<05:32,  2.43it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2193/3000:  73%|███████▎  | 2192/3000 [19:17<05:32,  2.43it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2193/3000:  73%|███████▎  | 2193/3000 [19:17<05:47,  2.32it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2193/3000:  73%|███████▎  | 2193/3000 [19:17<05:47,  2.32it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2194/3000:  73%|███████▎  | 2193/3000 [19:17<05:47,  2.32it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2194/3000:  73%|███████▎  | 2194/3000 [19:18<06:07,  2.19it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2194/3000:  73%|███████▎  | 2194/3000 [19:18<06:07,  2.19it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2195/3000:  73%|███████▎  | 2194/3000 [19:18<06:07,  2.19it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2195/3000:  73%|███████▎  | 2195/3000 [19:18<06:16,  2.14it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2195/3000:  73%|███████▎  | 2195/3000 [19:18<06:16,  2.14it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.47e+6]Epoch 2196/3000:  73%|███████▎  | 2195/3000 [19:18<06:16,  2.14it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.47e+6]Epoch 2196/3000:  73%|███████▎  | 2196/3000 [19:19<05:50,  2.29it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.47e+6]Epoch 2196/3000:  73%|███████▎  | 2196/3000 [19:19<05:50,  2.29it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2197/3000:  73%|███████▎  | 2196/3000 [19:19<05:50,  2.29it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2197/3000:  73%|███████▎  | 2197/3000 [19:19<05:45,  2.32it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2197/3000:  73%|███████▎  | 2197/3000 [19:19<05:45,  2.32it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.47e+6]Epoch 2198/3000:  73%|███████▎  | 2197/3000 [19:19<05:45,  2.32it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.47e+6]Epoch 2198/3000:  73%|███████▎  | 2198/3000 [19:20<05:50,  2.29it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.47e+6]Epoch 2198/3000:  73%|███████▎  | 2198/3000 [19:20<05:50,  2.29it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2199/3000:  73%|███████▎  | 2198/3000 [19:20<05:50,  2.29it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2199/3000:  73%|███████▎  | 2199/3000 [19:20<06:33,  2.04it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2199/3000:  73%|███████▎  | 2199/3000 [19:20<06:33,  2.04it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2200/3000:  73%|███████▎  | 2199/3000 [19:20<06:33,  2.04it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2200/3000:  73%|███████▎  | 2200/3000 [19:21<06:47,  1.96it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2200/3000:  73%|███████▎  | 2200/3000 [19:21<06:47,  1.96it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2201/3000:  73%|███████▎  | 2200/3000 [19:21<06:47,  1.96it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2201/3000:  73%|███████▎  | 2201/3000 [19:21<06:50,  1.94it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2201/3000:  73%|███████▎  | 2201/3000 [19:21<06:50,  1.94it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2202/3000:  73%|███████▎  | 2201/3000 [19:21<06:50,  1.94it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2202/3000:  73%|███████▎  | 2202/3000 [19:22<06:37,  2.01it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2202/3000:  73%|███████▎  | 2202/3000 [19:22<06:37,  2.01it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2203/3000:  73%|███████▎  | 2202/3000 [19:22<06:37,  2.01it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2203/3000:  73%|███████▎  | 2203/3000 [19:22<06:08,  2.17it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2203/3000:  73%|███████▎  | 2203/3000 [19:22<06:08,  2.17it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6] Epoch 2204/3000:  73%|███████▎  | 2203/3000 [19:22<06:08,  2.17it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6]Epoch 2204/3000:  73%|███████▎  | 2204/3000 [19:23<06:06,  2.17it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6]Epoch 2204/3000:  73%|███████▎  | 2204/3000 [19:23<06:06,  2.17it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2205/3000:  73%|███████▎  | 2204/3000 [19:23<06:06,  2.17it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2205/3000:  74%|███████▎  | 2205/3000 [19:23<06:12,  2.14it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2205/3000:  74%|███████▎  | 2205/3000 [19:23<06:12,  2.14it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2206/3000:  74%|███████▎  | 2205/3000 [19:23<06:12,  2.14it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2206/3000:  74%|███████▎  | 2206/3000 [19:24<06:11,  2.13it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2206/3000:  74%|███████▎  | 2206/3000 [19:24<06:11,  2.13it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2207/3000:  74%|███████▎  | 2206/3000 [19:24<06:11,  2.13it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2207/3000:  74%|███████▎  | 2207/3000 [19:24<06:00,  2.20it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2207/3000:  74%|███████▎  | 2207/3000 [19:24<06:00,  2.20it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2208/3000:  74%|███████▎  | 2207/3000 [19:24<06:00,  2.20it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2208/3000:  74%|███████▎  | 2208/3000 [19:24<05:56,  2.22it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2208/3000:  74%|███████▎  | 2208/3000 [19:24<05:56,  2.22it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2209/3000:  74%|███████▎  | 2208/3000 [19:24<05:56,  2.22it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2209/3000:  74%|███████▎  | 2209/3000 [19:25<05:58,  2.21it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2209/3000:  74%|███████▎  | 2209/3000 [19:25<05:58,  2.21it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2210/3000:  74%|███████▎  | 2209/3000 [19:25<05:58,  2.21it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2210/3000:  74%|███████▎  | 2210/3000 [19:25<05:58,  2.20it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2210/3000:  74%|███████▎  | 2210/3000 [19:25<05:58,  2.20it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2211/3000:  74%|███████▎  | 2210/3000 [19:25<05:58,  2.20it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2211/3000:  74%|███████▎  | 2211/3000 [19:26<05:57,  2.21it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2211/3000:  74%|███████▎  | 2211/3000 [19:26<05:57,  2.21it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2212/3000:  74%|███████▎  | 2211/3000 [19:26<05:57,  2.21it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2212/3000:  74%|███████▎  | 2212/3000 [19:26<05:43,  2.29it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2212/3000:  74%|███████▎  | 2212/3000 [19:26<05:43,  2.29it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2213/3000:  74%|███████▎  | 2212/3000 [19:26<05:43,  2.29it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2213/3000:  74%|███████▍  | 2213/3000 [19:27<05:57,  2.20it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2213/3000:  74%|███████▍  | 2213/3000 [19:27<05:57,  2.20it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2214/3000:  74%|███████▍  | 2213/3000 [19:27<05:57,  2.20it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2214/3000:  74%|███████▍  | 2214/3000 [19:27<05:50,  2.24it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2214/3000:  74%|███████▍  | 2214/3000 [19:27<05:50,  2.24it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.47e+6]Epoch 2215/3000:  74%|███████▍  | 2214/3000 [19:27<05:50,  2.24it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.47e+6]Epoch 2215/3000:  74%|███████▍  | 2215/3000 [19:28<05:59,  2.18it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.47e+6]Epoch 2215/3000:  74%|███████▍  | 2215/3000 [19:28<05:59,  2.18it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2216/3000:  74%|███████▍  | 2215/3000 [19:28<05:59,  2.18it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2216/3000:  74%|███████▍  | 2216/3000 [19:28<05:56,  2.20it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2216/3000:  74%|███████▍  | 2216/3000 [19:28<05:56,  2.20it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2217/3000:  74%|███████▍  | 2216/3000 [19:28<05:56,  2.20it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2217/3000:  74%|███████▍  | 2217/3000 [19:29<05:50,  2.23it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2217/3000:  74%|███████▍  | 2217/3000 [19:29<05:50,  2.23it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2218/3000:  74%|███████▍  | 2217/3000 [19:29<05:50,  2.23it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2218/3000:  74%|███████▍  | 2218/3000 [19:29<05:53,  2.21it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2218/3000:  74%|███████▍  | 2218/3000 [19:29<05:53,  2.21it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2219/3000:  74%|███████▍  | 2218/3000 [19:29<05:53,  2.21it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2219/3000:  74%|███████▍  | 2219/3000 [19:29<06:01,  2.16it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2219/3000:  74%|███████▍  | 2219/3000 [19:29<06:01,  2.16it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.47e+6]Epoch 2220/3000:  74%|███████▍  | 2219/3000 [19:29<06:01,  2.16it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.47e+6]Epoch 2220/3000:  74%|███████▍  | 2220/3000 [19:30<06:07,  2.12it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.47e+6]Epoch 2220/3000:  74%|███████▍  | 2220/3000 [19:30<06:07,  2.12it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.47e+6]Epoch 2221/3000:  74%|███████▍  | 2220/3000 [19:30<06:07,  2.12it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.47e+6]Epoch 2221/3000:  74%|███████▍  | 2221/3000 [19:30<06:11,  2.09it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.47e+6]Epoch 2221/3000:  74%|███████▍  | 2221/3000 [19:30<06:11,  2.09it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2222/3000:  74%|███████▍  | 2221/3000 [19:30<06:11,  2.09it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2222/3000:  74%|███████▍  | 2222/3000 [19:31<06:09,  2.11it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2222/3000:  74%|███████▍  | 2222/3000 [19:31<06:09,  2.11it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2223/3000:  74%|███████▍  | 2222/3000 [19:31<06:09,  2.11it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2223/3000:  74%|███████▍  | 2223/3000 [19:31<05:57,  2.17it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.47e+6]Epoch 2223/3000:  74%|███████▍  | 2223/3000 [19:31<05:57,  2.17it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2224/3000:  74%|███████▍  | 2223/3000 [19:31<05:57,  2.17it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2224/3000:  74%|███████▍  | 2224/3000 [19:32<05:42,  2.26it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.47e+6]Epoch 2224/3000:  74%|███████▍  | 2224/3000 [19:32<05:42,  2.26it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.47e+6]Epoch 2225/3000:  74%|███████▍  | 2224/3000 [19:32<05:42,  2.26it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.47e+6]Epoch 2225/3000:  74%|███████▍  | 2225/3000 [19:32<05:57,  2.17it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.47e+6]Epoch 2225/3000:  74%|███████▍  | 2225/3000 [19:32<05:57,  2.17it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2226/3000:  74%|███████▍  | 2225/3000 [19:32<05:57,  2.17it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2226/3000:  74%|███████▍  | 2226/3000 [19:33<05:57,  2.16it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2226/3000:  74%|███████▍  | 2226/3000 [19:33<05:57,  2.16it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2227/3000:  74%|███████▍  | 2226/3000 [19:33<05:57,  2.16it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2227/3000:  74%|███████▍  | 2227/3000 [19:33<06:08,  2.10it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2227/3000:  74%|███████▍  | 2227/3000 [19:33<06:08,  2.10it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2228/3000:  74%|███████▍  | 2227/3000 [19:33<06:08,  2.10it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2228/3000:  74%|███████▍  | 2228/3000 [19:34<05:29,  2.34it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2228/3000:  74%|███████▍  | 2228/3000 [19:34<05:29,  2.34it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.47e+6]Epoch 2229/3000:  74%|███████▍  | 2228/3000 [19:34<05:29,  2.34it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.47e+6]Epoch 2229/3000:  74%|███████▍  | 2229/3000 [19:34<05:31,  2.32it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.47e+6]Epoch 2229/3000:  74%|███████▍  | 2229/3000 [19:34<05:31,  2.32it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6] Epoch 2230/3000:  74%|███████▍  | 2229/3000 [19:34<05:31,  2.32it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6]Epoch 2230/3000:  74%|███████▍  | 2230/3000 [19:34<05:29,  2.34it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.47e+6]Epoch 2230/3000:  74%|███████▍  | 2230/3000 [19:34<05:29,  2.34it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2231/3000:  74%|███████▍  | 2230/3000 [19:34<05:29,  2.34it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2231/3000:  74%|███████▍  | 2231/3000 [19:35<05:29,  2.33it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2231/3000:  74%|███████▍  | 2231/3000 [19:35<05:29,  2.33it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2232/3000:  74%|███████▍  | 2231/3000 [19:35<05:29,  2.33it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2232/3000:  74%|███████▍  | 2232/3000 [19:35<05:28,  2.34it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2232/3000:  74%|███████▍  | 2232/3000 [19:35<05:28,  2.34it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2233/3000:  74%|███████▍  | 2232/3000 [19:35<05:28,  2.34it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2233/3000:  74%|███████▍  | 2233/3000 [19:36<05:41,  2.24it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2233/3000:  74%|███████▍  | 2233/3000 [19:36<05:41,  2.24it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2234/3000:  74%|███████▍  | 2233/3000 [19:36<05:41,  2.24it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2234/3000:  74%|███████▍  | 2234/3000 [19:36<05:27,  2.34it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2234/3000:  74%|███████▍  | 2234/3000 [19:36<05:27,  2.34it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.47e+6]Epoch 2235/3000:  74%|███████▍  | 2234/3000 [19:36<05:27,  2.34it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.47e+6]Epoch 2235/3000:  74%|███████▍  | 2235/3000 [19:37<05:33,  2.29it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.47e+6]Epoch 2235/3000:  74%|███████▍  | 2235/3000 [19:37<05:33,  2.29it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2236/3000:  74%|███████▍  | 2235/3000 [19:37<05:33,  2.29it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2236/3000:  75%|███████▍  | 2236/3000 [19:37<05:35,  2.28it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.47e+6]Epoch 2236/3000:  75%|███████▍  | 2236/3000 [19:37<05:35,  2.28it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2237/3000:  75%|███████▍  | 2236/3000 [19:37<05:35,  2.28it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2237/3000:  75%|███████▍  | 2237/3000 [19:37<05:27,  2.33it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2237/3000:  75%|███████▍  | 2237/3000 [19:37<05:27,  2.33it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2238/3000:  75%|███████▍  | 2237/3000 [19:37<05:27,  2.33it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2238/3000:  75%|███████▍  | 2238/3000 [19:38<05:35,  2.27it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.47e+6]Epoch 2238/3000:  75%|███████▍  | 2238/3000 [19:38<05:35,  2.27it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2239/3000:  75%|███████▍  | 2238/3000 [19:38<05:35,  2.27it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2239/3000:  75%|███████▍  | 2239/3000 [19:38<05:42,  2.22it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2239/3000:  75%|███████▍  | 2239/3000 [19:38<05:42,  2.22it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2240/3000:  75%|███████▍  | 2239/3000 [19:38<05:42,  2.22it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2240/3000:  75%|███████▍  | 2240/3000 [19:39<06:00,  2.11it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2240/3000:  75%|███████▍  | 2240/3000 [19:39<06:00,  2.11it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.47e+6]Epoch 2241/3000:  75%|███████▍  | 2240/3000 [19:39<06:00,  2.11it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.47e+6]Epoch 2241/3000:  75%|███████▍  | 2241/3000 [19:39<05:42,  2.22it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.47e+6]Epoch 2241/3000:  75%|███████▍  | 2241/3000 [19:39<05:42,  2.22it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2242/3000:  75%|███████▍  | 2241/3000 [19:39<05:42,  2.22it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2242/3000:  75%|███████▍  | 2242/3000 [19:40<05:24,  2.33it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.47e+6]Epoch 2242/3000:  75%|███████▍  | 2242/3000 [19:40<05:24,  2.33it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2243/3000:  75%|███████▍  | 2242/3000 [19:40<05:24,  2.33it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2243/3000:  75%|███████▍  | 2243/3000 [19:40<05:18,  2.37it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2243/3000:  75%|███████▍  | 2243/3000 [19:40<05:18,  2.37it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2244/3000:  75%|███████▍  | 2243/3000 [19:40<05:18,  2.37it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2244/3000:  75%|███████▍  | 2244/3000 [19:41<05:27,  2.31it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2244/3000:  75%|███████▍  | 2244/3000 [19:41<05:27,  2.31it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2245/3000:  75%|███████▍  | 2244/3000 [19:41<05:27,  2.31it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2245/3000:  75%|███████▍  | 2245/3000 [19:41<05:31,  2.28it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2245/3000:  75%|███████▍  | 2245/3000 [19:41<05:31,  2.28it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2246/3000:  75%|███████▍  | 2245/3000 [19:41<05:31,  2.28it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2246/3000:  75%|███████▍  | 2246/3000 [19:42<05:55,  2.12it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2246/3000:  75%|███████▍  | 2246/3000 [19:42<05:55,  2.12it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2247/3000:  75%|███████▍  | 2246/3000 [19:42<05:55,  2.12it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2247/3000:  75%|███████▍  | 2247/3000 [19:42<05:45,  2.18it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2247/3000:  75%|███████▍  | 2247/3000 [19:42<05:45,  2.18it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6] Epoch 2248/3000:  75%|███████▍  | 2247/3000 [19:42<05:45,  2.18it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6]Epoch 2248/3000:  75%|███████▍  | 2248/3000 [19:42<05:53,  2.13it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6]Epoch 2248/3000:  75%|███████▍  | 2248/3000 [19:42<05:53,  2.13it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2249/3000:  75%|███████▍  | 2248/3000 [19:42<05:53,  2.13it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2249/3000:  75%|███████▍  | 2249/3000 [19:43<05:49,  2.15it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2249/3000:  75%|███████▍  | 2249/3000 [19:43<05:49,  2.15it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2250/3000:  75%|███████▍  | 2249/3000 [19:43<05:49,  2.15it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2250/3000:  75%|███████▌  | 2250/3000 [19:43<05:36,  2.23it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2250/3000:  75%|███████▌  | 2250/3000 [19:43<05:36,  2.23it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2251/3000:  75%|███████▌  | 2250/3000 [19:43<05:36,  2.23it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2251/3000:  75%|███████▌  | 2251/3000 [19:44<05:33,  2.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2251/3000:  75%|███████▌  | 2251/3000 [19:44<05:33,  2.24it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.46e+6]Epoch 2252/3000:  75%|███████▌  | 2251/3000 [19:44<05:33,  2.24it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.46e+6]Epoch 2252/3000:  75%|███████▌  | 2252/3000 [19:44<05:34,  2.24it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.46e+6]Epoch 2252/3000:  75%|███████▌  | 2252/3000 [19:44<05:34,  2.24it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2253/3000:  75%|███████▌  | 2252/3000 [19:44<05:34,  2.24it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2253/3000:  75%|███████▌  | 2253/3000 [19:45<05:31,  2.25it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2253/3000:  75%|███████▌  | 2253/3000 [19:45<05:31,  2.25it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6] Epoch 2254/3000:  75%|███████▌  | 2253/3000 [19:45<05:31,  2.25it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6]Epoch 2254/3000:  75%|███████▌  | 2254/3000 [19:45<05:34,  2.23it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6]Epoch 2254/3000:  75%|███████▌  | 2254/3000 [19:45<05:34,  2.23it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2255/3000:  75%|███████▌  | 2254/3000 [19:45<05:34,  2.23it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2255/3000:  75%|███████▌  | 2255/3000 [19:46<05:33,  2.23it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2255/3000:  75%|███████▌  | 2255/3000 [19:46<05:33,  2.23it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2256/3000:  75%|███████▌  | 2255/3000 [19:46<05:33,  2.23it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2256/3000:  75%|███████▌  | 2256/3000 [19:46<05:24,  2.30it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2256/3000:  75%|███████▌  | 2256/3000 [19:46<05:24,  2.30it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2257/3000:  75%|███████▌  | 2256/3000 [19:46<05:24,  2.30it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2257/3000:  75%|███████▌  | 2257/3000 [19:46<05:32,  2.23it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2257/3000:  75%|███████▌  | 2257/3000 [19:46<05:32,  2.23it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2258/3000:  75%|███████▌  | 2257/3000 [19:46<05:32,  2.23it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2258/3000:  75%|███████▌  | 2258/3000 [19:47<05:29,  2.25it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2258/3000:  75%|███████▌  | 2258/3000 [19:47<05:29,  2.25it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2259/3000:  75%|███████▌  | 2258/3000 [19:47<05:29,  2.25it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2259/3000:  75%|███████▌  | 2259/3000 [19:47<05:26,  2.27it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2259/3000:  75%|███████▌  | 2259/3000 [19:47<05:26,  2.27it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2260/3000:  75%|███████▌  | 2259/3000 [19:47<05:26,  2.27it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2260/3000:  75%|███████▌  | 2260/3000 [19:48<05:33,  2.22it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2260/3000:  75%|███████▌  | 2260/3000 [19:48<05:33,  2.22it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2261/3000:  75%|███████▌  | 2260/3000 [19:48<05:33,  2.22it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2261/3000:  75%|███████▌  | 2261/3000 [19:48<05:27,  2.26it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2261/3000:  75%|███████▌  | 2261/3000 [19:48<05:27,  2.26it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2262/3000:  75%|███████▌  | 2261/3000 [19:48<05:27,  2.26it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2262/3000:  75%|███████▌  | 2262/3000 [19:49<05:27,  2.25it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2262/3000:  75%|███████▌  | 2262/3000 [19:49<05:27,  2.25it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2263/3000:  75%|███████▌  | 2262/3000 [19:49<05:27,  2.25it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2263/3000:  75%|███████▌  | 2263/3000 [19:49<05:24,  2.27it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2263/3000:  75%|███████▌  | 2263/3000 [19:49<05:24,  2.27it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.46e+6]Epoch 2264/3000:  75%|███████▌  | 2263/3000 [19:49<05:24,  2.27it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.46e+6]Epoch 2264/3000:  75%|███████▌  | 2264/3000 [19:50<05:19,  2.30it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.46e+6]Epoch 2264/3000:  75%|███████▌  | 2264/3000 [19:50<05:19,  2.30it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2265/3000:  75%|███████▌  | 2264/3000 [19:50<05:19,  2.30it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2265/3000:  76%|███████▌  | 2265/3000 [19:50<05:16,  2.32it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2265/3000:  76%|███████▌  | 2265/3000 [19:50<05:16,  2.32it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6] Epoch 2266/3000:  76%|███████▌  | 2265/3000 [19:50<05:16,  2.32it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6]Epoch 2266/3000:  76%|███████▌  | 2266/3000 [19:50<05:16,  2.32it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6]Epoch 2266/3000:  76%|███████▌  | 2266/3000 [19:50<05:16,  2.32it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2267/3000:  76%|███████▌  | 2266/3000 [19:50<05:16,  2.32it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2267/3000:  76%|███████▌  | 2267/3000 [19:51<05:29,  2.22it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2267/3000:  76%|███████▌  | 2267/3000 [19:51<05:29,  2.22it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2268/3000:  76%|███████▌  | 2267/3000 [19:51<05:29,  2.22it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2268/3000:  76%|███████▌  | 2268/3000 [19:51<04:58,  2.45it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2268/3000:  76%|███████▌  | 2268/3000 [19:51<04:58,  2.45it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2269/3000:  76%|███████▌  | 2268/3000 [19:51<04:58,  2.45it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2269/3000:  76%|███████▌  | 2269/3000 [19:52<04:54,  2.48it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2269/3000:  76%|███████▌  | 2269/3000 [19:52<04:54,  2.48it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2270/3000:  76%|███████▌  | 2269/3000 [19:52<04:54,  2.48it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2270/3000:  76%|███████▌  | 2270/3000 [19:52<05:09,  2.36it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2270/3000:  76%|███████▌  | 2270/3000 [19:52<05:09,  2.36it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2271/3000:  76%|███████▌  | 2270/3000 [19:52<05:09,  2.36it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2271/3000:  76%|███████▌  | 2271/3000 [19:52<05:11,  2.34it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2271/3000:  76%|███████▌  | 2271/3000 [19:52<05:11,  2.34it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2272/3000:  76%|███████▌  | 2271/3000 [19:52<05:11,  2.34it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2272/3000:  76%|███████▌  | 2272/3000 [19:53<05:24,  2.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2272/3000:  76%|███████▌  | 2272/3000 [19:53<05:24,  2.24it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2273/3000:  76%|███████▌  | 2272/3000 [19:53<05:24,  2.24it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2273/3000:  76%|███████▌  | 2273/3000 [19:53<05:00,  2.42it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2273/3000:  76%|███████▌  | 2273/3000 [19:53<05:00,  2.42it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2274/3000:  76%|███████▌  | 2273/3000 [19:53<05:00,  2.42it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2274/3000:  76%|███████▌  | 2274/3000 [19:54<04:48,  2.51it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2274/3000:  76%|███████▌  | 2274/3000 [19:54<04:48,  2.51it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2275/3000:  76%|███████▌  | 2274/3000 [19:54<04:48,  2.51it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2275/3000:  76%|███████▌  | 2275/3000 [19:54<04:47,  2.52it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2275/3000:  76%|███████▌  | 2275/3000 [19:54<04:47,  2.52it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2276/3000:  76%|███████▌  | 2275/3000 [19:54<04:47,  2.52it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2276/3000:  76%|███████▌  | 2276/3000 [19:55<04:56,  2.44it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2276/3000:  76%|███████▌  | 2276/3000 [19:55<04:56,  2.44it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6] Epoch 2277/3000:  76%|███████▌  | 2276/3000 [19:55<04:56,  2.44it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6]Epoch 2277/3000:  76%|███████▌  | 2277/3000 [19:55<04:55,  2.45it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6]Epoch 2277/3000:  76%|███████▌  | 2277/3000 [19:55<04:55,  2.45it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2278/3000:  76%|███████▌  | 2277/3000 [19:55<04:55,  2.45it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2278/3000:  76%|███████▌  | 2278/3000 [19:55<04:47,  2.51it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2278/3000:  76%|███████▌  | 2278/3000 [19:55<04:47,  2.51it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2279/3000:  76%|███████▌  | 2278/3000 [19:55<04:47,  2.51it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2279/3000:  76%|███████▌  | 2279/3000 [19:56<04:55,  2.44it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2279/3000:  76%|███████▌  | 2279/3000 [19:56<04:55,  2.44it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2280/3000:  76%|███████▌  | 2279/3000 [19:56<04:55,  2.44it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2280/3000:  76%|███████▌  | 2280/3000 [19:56<04:52,  2.46it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2280/3000:  76%|███████▌  | 2280/3000 [19:56<04:52,  2.46it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2281/3000:  76%|███████▌  | 2280/3000 [19:56<04:52,  2.46it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2281/3000:  76%|███████▌  | 2281/3000 [19:57<05:04,  2.36it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2281/3000:  76%|███████▌  | 2281/3000 [19:57<05:04,  2.36it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2282/3000:  76%|███████▌  | 2281/3000 [19:57<05:04,  2.36it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2282/3000:  76%|███████▌  | 2282/3000 [19:57<05:19,  2.25it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2282/3000:  76%|███████▌  | 2282/3000 [19:57<05:19,  2.25it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2283/3000:  76%|███████▌  | 2282/3000 [19:57<05:19,  2.25it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2283/3000:  76%|███████▌  | 2283/3000 [19:58<05:26,  2.19it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2283/3000:  76%|███████▌  | 2283/3000 [19:58<05:26,  2.19it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2284/3000:  76%|███████▌  | 2283/3000 [19:58<05:26,  2.19it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2284/3000:  76%|███████▌  | 2284/3000 [19:58<05:22,  2.22it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2284/3000:  76%|███████▌  | 2284/3000 [19:58<05:22,  2.22it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2285/3000:  76%|███████▌  | 2284/3000 [19:58<05:22,  2.22it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2285/3000:  76%|███████▌  | 2285/3000 [19:59<05:37,  2.12it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2285/3000:  76%|███████▌  | 2285/3000 [19:59<05:37,  2.12it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2286/3000:  76%|███████▌  | 2285/3000 [19:59<05:37,  2.12it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2286/3000:  76%|███████▌  | 2286/3000 [19:59<05:27,  2.18it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2286/3000:  76%|███████▌  | 2286/3000 [19:59<05:27,  2.18it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2287/3000:  76%|███████▌  | 2286/3000 [19:59<05:27,  2.18it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2287/3000:  76%|███████▌  | 2287/3000 [19:59<05:19,  2.23it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2287/3000:  76%|███████▌  | 2287/3000 [19:59<05:19,  2.23it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2288/3000:  76%|███████▌  | 2287/3000 [19:59<05:19,  2.23it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2288/3000:  76%|███████▋  | 2288/3000 [20:00<05:09,  2.30it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2288/3000:  76%|███████▋  | 2288/3000 [20:00<05:09,  2.30it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2289/3000:  76%|███████▋  | 2288/3000 [20:00<05:09,  2.30it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2289/3000:  76%|███████▋  | 2289/3000 [20:00<05:00,  2.37it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2289/3000:  76%|███████▋  | 2289/3000 [20:00<05:00,  2.37it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2290/3000:  76%|███████▋  | 2289/3000 [20:00<05:00,  2.37it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2290/3000:  76%|███████▋  | 2290/3000 [20:01<05:03,  2.34it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2290/3000:  76%|███████▋  | 2290/3000 [20:01<05:03,  2.34it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2291/3000:  76%|███████▋  | 2290/3000 [20:01<05:03,  2.34it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2291/3000:  76%|███████▋  | 2291/3000 [20:01<05:11,  2.28it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2291/3000:  76%|███████▋  | 2291/3000 [20:01<05:11,  2.28it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2292/3000:  76%|███████▋  | 2291/3000 [20:01<05:11,  2.28it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2292/3000:  76%|███████▋  | 2292/3000 [20:02<05:12,  2.26it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2292/3000:  76%|███████▋  | 2292/3000 [20:02<05:12,  2.26it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2293/3000:  76%|███████▋  | 2292/3000 [20:02<05:12,  2.26it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2293/3000:  76%|███████▋  | 2293/3000 [20:02<05:13,  2.26it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2293/3000:  76%|███████▋  | 2293/3000 [20:02<05:13,  2.26it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6] Epoch 2294/3000:  76%|███████▋  | 2293/3000 [20:02<05:13,  2.26it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6]Epoch 2294/3000:  76%|███████▋  | 2294/3000 [20:02<05:02,  2.34it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6]Epoch 2294/3000:  76%|███████▋  | 2294/3000 [20:02<05:02,  2.34it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2295/3000:  76%|███████▋  | 2294/3000 [20:02<05:02,  2.34it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2295/3000:  76%|███████▋  | 2295/3000 [20:03<05:08,  2.29it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2295/3000:  76%|███████▋  | 2295/3000 [20:03<05:08,  2.29it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2296/3000:  76%|███████▋  | 2295/3000 [20:03<05:08,  2.29it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2296/3000:  77%|███████▋  | 2296/3000 [20:03<04:45,  2.47it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2296/3000:  77%|███████▋  | 2296/3000 [20:03<04:45,  2.47it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2297/3000:  77%|███████▋  | 2296/3000 [20:03<04:45,  2.47it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2297/3000:  77%|███████▋  | 2297/3000 [20:04<04:56,  2.37it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2297/3000:  77%|███████▋  | 2297/3000 [20:04<04:56,  2.37it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2298/3000:  77%|███████▋  | 2297/3000 [20:04<04:56,  2.37it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2298/3000:  77%|███████▋  | 2298/3000 [20:04<04:34,  2.55it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2298/3000:  77%|███████▋  | 2298/3000 [20:04<04:34,  2.55it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2299/3000:  77%|███████▋  | 2298/3000 [20:04<04:34,  2.55it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2299/3000:  77%|███████▋  | 2299/3000 [20:04<04:41,  2.49it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2299/3000:  77%|███████▋  | 2299/3000 [20:04<04:41,  2.49it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2300/3000:  77%|███████▋  | 2299/3000 [20:04<04:41,  2.49it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2300/3000:  77%|███████▋  | 2300/3000 [20:05<04:51,  2.40it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2300/3000:  77%|███████▋  | 2300/3000 [20:05<04:51,  2.40it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2301/3000:  77%|███████▋  | 2300/3000 [20:05<04:51,  2.40it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2301/3000:  77%|███████▋  | 2301/3000 [20:05<04:52,  2.39it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2301/3000:  77%|███████▋  | 2301/3000 [20:05<04:52,  2.39it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2302/3000:  77%|███████▋  | 2301/3000 [20:05<04:52,  2.39it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2302/3000:  77%|███████▋  | 2302/3000 [20:06<04:52,  2.39it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2302/3000:  77%|███████▋  | 2302/3000 [20:06<04:52,  2.39it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2303/3000:  77%|███████▋  | 2302/3000 [20:06<04:52,  2.39it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2303/3000:  77%|███████▋  | 2303/3000 [20:06<04:52,  2.38it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2303/3000:  77%|███████▋  | 2303/3000 [20:06<04:52,  2.38it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2304/3000:  77%|███████▋  | 2303/3000 [20:06<04:52,  2.38it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2304/3000:  77%|███████▋  | 2304/3000 [20:07<05:01,  2.31it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2304/3000:  77%|███████▋  | 2304/3000 [20:07<05:01,  2.31it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2305/3000:  77%|███████▋  | 2304/3000 [20:07<05:01,  2.31it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2305/3000:  77%|███████▋  | 2305/3000 [20:07<05:12,  2.23it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2305/3000:  77%|███████▋  | 2305/3000 [20:07<05:12,  2.23it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.46e+6]Epoch 2306/3000:  77%|███████▋  | 2305/3000 [20:07<05:12,  2.23it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.46e+6]Epoch 2306/3000:  77%|███████▋  | 2306/3000 [20:07<05:07,  2.25it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.46e+6]Epoch 2306/3000:  77%|███████▋  | 2306/3000 [20:07<05:07,  2.25it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2307/3000:  77%|███████▋  | 2306/3000 [20:07<05:07,  2.25it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2307/3000:  77%|███████▋  | 2307/3000 [20:08<05:15,  2.20it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2307/3000:  77%|███████▋  | 2307/3000 [20:08<05:15,  2.20it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.46e+6] Epoch 2308/3000:  77%|███████▋  | 2307/3000 [20:08<05:15,  2.20it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.46e+6]Epoch 2308/3000:  77%|███████▋  | 2308/3000 [20:08<05:13,  2.21it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.46e+6]Epoch 2308/3000:  77%|███████▋  | 2308/3000 [20:08<05:13,  2.21it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2309/3000:  77%|███████▋  | 2308/3000 [20:08<05:13,  2.21it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2309/3000:  77%|███████▋  | 2309/3000 [20:09<05:20,  2.16it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2309/3000:  77%|███████▋  | 2309/3000 [20:09<05:20,  2.16it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2310/3000:  77%|███████▋  | 2309/3000 [20:09<05:20,  2.16it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2310/3000:  77%|███████▋  | 2310/3000 [20:09<05:12,  2.21it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2310/3000:  77%|███████▋  | 2310/3000 [20:09<05:12,  2.21it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2311/3000:  77%|███████▋  | 2310/3000 [20:09<05:12,  2.21it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2311/3000:  77%|███████▋  | 2311/3000 [20:10<05:12,  2.20it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2311/3000:  77%|███████▋  | 2311/3000 [20:10<05:12,  2.20it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2312/3000:  77%|███████▋  | 2311/3000 [20:10<05:12,  2.20it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2312/3000:  77%|███████▋  | 2312/3000 [20:10<05:07,  2.24it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2312/3000:  77%|███████▋  | 2312/3000 [20:10<05:07,  2.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2313/3000:  77%|███████▋  | 2312/3000 [20:10<05:07,  2.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2313/3000:  77%|███████▋  | 2313/3000 [20:11<05:05,  2.25it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2313/3000:  77%|███████▋  | 2313/3000 [20:11<05:05,  2.25it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2314/3000:  77%|███████▋  | 2313/3000 [20:11<05:05,  2.25it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2314/3000:  77%|███████▋  | 2314/3000 [20:11<04:52,  2.34it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2314/3000:  77%|███████▋  | 2314/3000 [20:11<04:52,  2.34it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2315/3000:  77%|███████▋  | 2314/3000 [20:11<04:52,  2.34it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2315/3000:  77%|███████▋  | 2315/3000 [20:11<04:35,  2.48it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2315/3000:  77%|███████▋  | 2315/3000 [20:11<04:35,  2.48it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2316/3000:  77%|███████▋  | 2315/3000 [20:11<04:35,  2.48it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2316/3000:  77%|███████▋  | 2316/3000 [20:12<04:48,  2.37it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2316/3000:  77%|███████▋  | 2316/3000 [20:12<04:48,  2.37it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2317/3000:  77%|███████▋  | 2316/3000 [20:12<04:48,  2.37it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2317/3000:  77%|███████▋  | 2317/3000 [20:12<05:00,  2.27it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2317/3000:  77%|███████▋  | 2317/3000 [20:12<05:00,  2.27it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2318/3000:  77%|███████▋  | 2317/3000 [20:12<05:00,  2.27it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2318/3000:  77%|███████▋  | 2318/3000 [20:13<05:02,  2.25it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2318/3000:  77%|███████▋  | 2318/3000 [20:13<05:02,  2.25it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2319/3000:  77%|███████▋  | 2318/3000 [20:13<05:02,  2.25it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2319/3000:  77%|███████▋  | 2319/3000 [20:13<05:10,  2.19it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2319/3000:  77%|███████▋  | 2319/3000 [20:13<05:10,  2.19it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2320/3000:  77%|███████▋  | 2319/3000 [20:13<05:10,  2.19it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2320/3000:  77%|███████▋  | 2320/3000 [20:14<05:11,  2.18it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2320/3000:  77%|███████▋  | 2320/3000 [20:14<05:11,  2.18it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2321/3000:  77%|███████▋  | 2320/3000 [20:14<05:11,  2.18it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2321/3000:  77%|███████▋  | 2321/3000 [20:14<05:11,  2.18it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2321/3000:  77%|███████▋  | 2321/3000 [20:14<05:11,  2.18it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2322/3000:  77%|███████▋  | 2321/3000 [20:14<05:11,  2.18it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2322/3000:  77%|███████▋  | 2322/3000 [20:15<05:02,  2.24it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2322/3000:  77%|███████▋  | 2322/3000 [20:15<05:02,  2.24it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2323/3000:  77%|███████▋  | 2322/3000 [20:15<05:02,  2.24it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2323/3000:  77%|███████▋  | 2323/3000 [20:15<04:56,  2.29it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2323/3000:  77%|███████▋  | 2323/3000 [20:15<04:56,  2.29it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2324/3000:  77%|███████▋  | 2323/3000 [20:15<04:56,  2.29it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2324/3000:  77%|███████▋  | 2324/3000 [20:15<05:01,  2.24it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2324/3000:  77%|███████▋  | 2324/3000 [20:15<05:01,  2.24it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2325/3000:  77%|███████▋  | 2324/3000 [20:15<05:01,  2.24it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2325/3000:  78%|███████▊  | 2325/3000 [20:16<05:05,  2.21it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2325/3000:  78%|███████▊  | 2325/3000 [20:16<05:05,  2.21it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2326/3000:  78%|███████▊  | 2325/3000 [20:16<05:05,  2.21it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2326/3000:  78%|███████▊  | 2326/3000 [20:16<05:01,  2.24it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2326/3000:  78%|███████▊  | 2326/3000 [20:16<05:01,  2.24it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2327/3000:  78%|███████▊  | 2326/3000 [20:16<05:01,  2.24it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2327/3000:  78%|███████▊  | 2327/3000 [20:17<05:06,  2.19it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2327/3000:  78%|███████▊  | 2327/3000 [20:17<05:06,  2.19it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2328/3000:  78%|███████▊  | 2327/3000 [20:17<05:06,  2.19it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2328/3000:  78%|███████▊  | 2328/3000 [20:17<05:12,  2.15it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2328/3000:  78%|███████▊  | 2328/3000 [20:17<05:12,  2.15it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2329/3000:  78%|███████▊  | 2328/3000 [20:17<05:12,  2.15it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2329/3000:  78%|███████▊  | 2329/3000 [20:18<05:07,  2.18it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2329/3000:  78%|███████▊  | 2329/3000 [20:18<05:07,  2.18it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6] Epoch 2330/3000:  78%|███████▊  | 2329/3000 [20:18<05:07,  2.18it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6]Epoch 2330/3000:  78%|███████▊  | 2330/3000 [20:18<05:20,  2.09it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6]Epoch 2330/3000:  78%|███████▊  | 2330/3000 [20:18<05:20,  2.09it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2331/3000:  78%|███████▊  | 2330/3000 [20:18<05:20,  2.09it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2331/3000:  78%|███████▊  | 2331/3000 [20:19<05:12,  2.14it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2331/3000:  78%|███████▊  | 2331/3000 [20:19<05:12,  2.14it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2332/3000:  78%|███████▊  | 2331/3000 [20:19<05:12,  2.14it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2332/3000:  78%|███████▊  | 2332/3000 [20:19<04:53,  2.28it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2332/3000:  78%|███████▊  | 2332/3000 [20:19<04:53,  2.28it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2333/3000:  78%|███████▊  | 2332/3000 [20:19<04:53,  2.28it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2333/3000:  78%|███████▊  | 2333/3000 [20:20<04:57,  2.25it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2333/3000:  78%|███████▊  | 2333/3000 [20:20<04:57,  2.25it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2334/3000:  78%|███████▊  | 2333/3000 [20:20<04:57,  2.25it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2334/3000:  78%|███████▊  | 2334/3000 [20:20<04:52,  2.28it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2334/3000:  78%|███████▊  | 2334/3000 [20:20<04:52,  2.28it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.46e+6]Epoch 2335/3000:  78%|███████▊  | 2334/3000 [20:20<04:52,  2.28it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.46e+6]Epoch 2335/3000:  78%|███████▊  | 2335/3000 [20:20<04:48,  2.30it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.46e+6]Epoch 2335/3000:  78%|███████▊  | 2335/3000 [20:20<04:48,  2.30it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2336/3000:  78%|███████▊  | 2335/3000 [20:20<04:48,  2.30it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2336/3000:  78%|███████▊  | 2336/3000 [20:21<04:50,  2.28it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2336/3000:  78%|███████▊  | 2336/3000 [20:21<04:50,  2.28it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2337/3000:  78%|███████▊  | 2336/3000 [20:21<04:50,  2.28it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2337/3000:  78%|███████▊  | 2337/3000 [20:21<04:55,  2.24it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2337/3000:  78%|███████▊  | 2337/3000 [20:21<04:55,  2.24it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2338/3000:  78%|███████▊  | 2337/3000 [20:21<04:55,  2.24it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2338/3000:  78%|███████▊  | 2338/3000 [20:22<04:53,  2.26it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2338/3000:  78%|███████▊  | 2338/3000 [20:22<04:53,  2.26it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6] Epoch 2339/3000:  78%|███████▊  | 2338/3000 [20:22<04:53,  2.26it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6]Epoch 2339/3000:  78%|███████▊  | 2339/3000 [20:22<05:00,  2.20it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6]Epoch 2339/3000:  78%|███████▊  | 2339/3000 [20:22<05:00,  2.20it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2340/3000:  78%|███████▊  | 2339/3000 [20:22<05:00,  2.20it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2340/3000:  78%|███████▊  | 2340/3000 [20:23<04:58,  2.21it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2340/3000:  78%|███████▊  | 2340/3000 [20:23<04:58,  2.21it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2341/3000:  78%|███████▊  | 2340/3000 [20:23<04:58,  2.21it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2341/3000:  78%|███████▊  | 2341/3000 [20:23<04:51,  2.26it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2341/3000:  78%|███████▊  | 2341/3000 [20:23<04:51,  2.26it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2342/3000:  78%|███████▊  | 2341/3000 [20:23<04:51,  2.26it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2342/3000:  78%|███████▊  | 2342/3000 [20:24<04:39,  2.35it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2342/3000:  78%|███████▊  | 2342/3000 [20:24<04:39,  2.35it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2343/3000:  78%|███████▊  | 2342/3000 [20:24<04:39,  2.35it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2343/3000:  78%|███████▊  | 2343/3000 [20:24<04:53,  2.24it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2343/3000:  78%|███████▊  | 2343/3000 [20:24<04:53,  2.24it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.46e+6] Epoch 2344/3000:  78%|███████▊  | 2343/3000 [20:24<04:53,  2.24it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.46e+6]Epoch 2344/3000:  78%|███████▊  | 2344/3000 [20:24<04:32,  2.40it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.46e+6]Epoch 2344/3000:  78%|███████▊  | 2344/3000 [20:24<04:32,  2.40it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2345/3000:  78%|███████▊  | 2344/3000 [20:24<04:32,  2.40it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2345/3000:  78%|███████▊  | 2345/3000 [20:25<04:44,  2.31it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2345/3000:  78%|███████▊  | 2345/3000 [20:25<04:44,  2.31it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2346/3000:  78%|███████▊  | 2345/3000 [20:25<04:44,  2.31it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2346/3000:  78%|███████▊  | 2346/3000 [20:25<04:57,  2.20it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2346/3000:  78%|███████▊  | 2346/3000 [20:25<04:57,  2.20it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2347/3000:  78%|███████▊  | 2346/3000 [20:25<04:57,  2.20it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2347/3000:  78%|███████▊  | 2347/3000 [20:26<05:00,  2.17it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2347/3000:  78%|███████▊  | 2347/3000 [20:26<05:00,  2.17it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2348/3000:  78%|███████▊  | 2347/3000 [20:26<05:00,  2.17it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2348/3000:  78%|███████▊  | 2348/3000 [20:26<04:45,  2.28it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2348/3000:  78%|███████▊  | 2348/3000 [20:26<04:45,  2.28it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2349/3000:  78%|███████▊  | 2348/3000 [20:26<04:45,  2.28it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2349/3000:  78%|███████▊  | 2349/3000 [20:27<04:41,  2.31it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2349/3000:  78%|███████▊  | 2349/3000 [20:27<04:41,  2.31it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2350/3000:  78%|███████▊  | 2349/3000 [20:27<04:41,  2.31it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2350/3000:  78%|███████▊  | 2350/3000 [20:27<04:45,  2.27it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2350/3000:  78%|███████▊  | 2350/3000 [20:27<04:45,  2.27it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.46e+6]Epoch 2351/3000:  78%|███████▊  | 2350/3000 [20:27<04:45,  2.27it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.46e+6]Epoch 2351/3000:  78%|███████▊  | 2351/3000 [20:27<04:44,  2.28it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.46e+6]Epoch 2351/3000:  78%|███████▊  | 2351/3000 [20:27<04:44,  2.28it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2352/3000:  78%|███████▊  | 2351/3000 [20:27<04:44,  2.28it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2352/3000:  78%|███████▊  | 2352/3000 [20:28<04:49,  2.24it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2352/3000:  78%|███████▊  | 2352/3000 [20:28<04:49,  2.24it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2353/3000:  78%|███████▊  | 2352/3000 [20:28<04:49,  2.24it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2353/3000:  78%|███████▊  | 2353/3000 [20:28<04:58,  2.16it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2353/3000:  78%|███████▊  | 2353/3000 [20:28<04:58,  2.16it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2354/3000:  78%|███████▊  | 2353/3000 [20:28<04:58,  2.16it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2354/3000:  78%|███████▊  | 2354/3000 [20:29<05:05,  2.12it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2354/3000:  78%|███████▊  | 2354/3000 [20:29<05:05,  2.12it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2355/3000:  78%|███████▊  | 2354/3000 [20:29<05:05,  2.12it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2355/3000:  78%|███████▊  | 2355/3000 [20:29<05:02,  2.13it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2355/3000:  78%|███████▊  | 2355/3000 [20:29<05:02,  2.13it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2356/3000:  78%|███████▊  | 2355/3000 [20:29<05:02,  2.13it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2356/3000:  79%|███████▊  | 2356/3000 [20:30<05:02,  2.13it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2356/3000:  79%|███████▊  | 2356/3000 [20:30<05:02,  2.13it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2357/3000:  79%|███████▊  | 2356/3000 [20:30<05:02,  2.13it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2357/3000:  79%|███████▊  | 2357/3000 [20:30<05:06,  2.10it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.46e+6]Epoch 2357/3000:  79%|███████▊  | 2357/3000 [20:30<05:06,  2.10it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2358/3000:  79%|███████▊  | 2357/3000 [20:30<05:06,  2.10it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2358/3000:  79%|███████▊  | 2358/3000 [20:31<04:49,  2.22it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.46e+6]Epoch 2358/3000:  79%|███████▊  | 2358/3000 [20:31<04:49,  2.22it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2359/3000:  79%|███████▊  | 2358/3000 [20:31<04:49,  2.22it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2359/3000:  79%|███████▊  | 2359/3000 [20:31<04:39,  2.29it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.46e+6]Epoch 2359/3000:  79%|███████▊  | 2359/3000 [20:31<04:39,  2.29it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2360/3000:  79%|███████▊  | 2359/3000 [20:31<04:39,  2.29it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2360/3000:  79%|███████▊  | 2360/3000 [20:32<04:19,  2.47it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2360/3000:  79%|███████▊  | 2360/3000 [20:32<04:19,  2.47it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2361/3000:  79%|███████▊  | 2360/3000 [20:32<04:19,  2.47it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2361/3000:  79%|███████▊  | 2361/3000 [20:32<04:24,  2.42it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.46e+6]Epoch 2361/3000:  79%|███████▊  | 2361/3000 [20:32<04:24,  2.42it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2362/3000:  79%|███████▊  | 2361/3000 [20:32<04:24,  2.42it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2362/3000:  79%|███████▊  | 2362/3000 [20:32<04:22,  2.43it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2362/3000:  79%|███████▊  | 2362/3000 [20:32<04:22,  2.43it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2363/3000:  79%|███████▊  | 2362/3000 [20:32<04:22,  2.43it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2363/3000:  79%|███████▉  | 2363/3000 [20:33<04:28,  2.37it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.46e+6]Epoch 2363/3000:  79%|███████▉  | 2363/3000 [20:33<04:28,  2.37it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.46e+6]Epoch 2364/3000:  79%|███████▉  | 2363/3000 [20:33<04:28,  2.37it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.46e+6]Epoch 2364/3000:  79%|███████▉  | 2364/3000 [20:33<04:35,  2.31it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.46e+6]Epoch 2364/3000:  79%|███████▉  | 2364/3000 [20:33<04:35,  2.31it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2365/3000:  79%|███████▉  | 2364/3000 [20:33<04:35,  2.31it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2365/3000:  79%|███████▉  | 2365/3000 [20:34<04:32,  2.33it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2365/3000:  79%|███████▉  | 2365/3000 [20:34<04:32,  2.33it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2366/3000:  79%|███████▉  | 2365/3000 [20:34<04:32,  2.33it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2366/3000:  79%|███████▉  | 2366/3000 [20:34<04:29,  2.36it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2366/3000:  79%|███████▉  | 2366/3000 [20:34<04:29,  2.36it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2367/3000:  79%|███████▉  | 2366/3000 [20:34<04:29,  2.36it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2367/3000:  79%|███████▉  | 2367/3000 [20:34<04:24,  2.39it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2367/3000:  79%|███████▉  | 2367/3000 [20:34<04:24,  2.39it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2368/3000:  79%|███████▉  | 2367/3000 [20:34<04:24,  2.39it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2368/3000:  79%|███████▉  | 2368/3000 [20:35<04:38,  2.27it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2368/3000:  79%|███████▉  | 2368/3000 [20:35<04:38,  2.27it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2369/3000:  79%|███████▉  | 2368/3000 [20:35<04:38,  2.27it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2369/3000:  79%|███████▉  | 2369/3000 [20:35<04:34,  2.30it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.46e+6]Epoch 2369/3000:  79%|███████▉  | 2369/3000 [20:35<04:34,  2.30it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2370/3000:  79%|███████▉  | 2369/3000 [20:35<04:34,  2.30it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2370/3000:  79%|███████▉  | 2370/3000 [20:36<04:31,  2.32it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2370/3000:  79%|███████▉  | 2370/3000 [20:36<04:31,  2.32it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6] Epoch 2371/3000:  79%|███████▉  | 2370/3000 [20:36<04:31,  2.32it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6]Epoch 2371/3000:  79%|███████▉  | 2371/3000 [20:36<04:37,  2.27it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6]Epoch 2371/3000:  79%|███████▉  | 2371/3000 [20:36<04:37,  2.27it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2372/3000:  79%|███████▉  | 2371/3000 [20:36<04:37,  2.27it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2372/3000:  79%|███████▉  | 2372/3000 [20:37<04:35,  2.28it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2372/3000:  79%|███████▉  | 2372/3000 [20:37<04:35,  2.28it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.46e+6] Epoch 2373/3000:  79%|███████▉  | 2372/3000 [20:37<04:35,  2.28it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.46e+6]Epoch 2373/3000:  79%|███████▉  | 2373/3000 [20:37<04:39,  2.24it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.46e+6]Epoch 2373/3000:  79%|███████▉  | 2373/3000 [20:37<04:39,  2.24it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2374/3000:  79%|███████▉  | 2373/3000 [20:37<04:39,  2.24it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2374/3000:  79%|███████▉  | 2374/3000 [20:38<04:40,  2.23it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2374/3000:  79%|███████▉  | 2374/3000 [20:38<04:40,  2.23it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2375/3000:  79%|███████▉  | 2374/3000 [20:38<04:40,  2.23it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2375/3000:  79%|███████▉  | 2375/3000 [20:38<04:39,  2.23it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.46e+6]Epoch 2375/3000:  79%|███████▉  | 2375/3000 [20:38<04:39,  2.23it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2376/3000:  79%|███████▉  | 2375/3000 [20:38<04:39,  2.23it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2376/3000:  79%|███████▉  | 2376/3000 [20:38<04:29,  2.32it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.46e+6]Epoch 2376/3000:  79%|███████▉  | 2376/3000 [20:38<04:29,  2.32it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6] Epoch 2377/3000:  79%|███████▉  | 2376/3000 [20:38<04:29,  2.32it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6]Epoch 2377/3000:  79%|███████▉  | 2377/3000 [20:39<04:23,  2.36it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.46e+6]Epoch 2377/3000:  79%|███████▉  | 2377/3000 [20:39<04:23,  2.36it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2378/3000:  79%|███████▉  | 2377/3000 [20:39<04:23,  2.36it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2378/3000:  79%|███████▉  | 2378/3000 [20:39<04:29,  2.31it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2378/3000:  79%|███████▉  | 2378/3000 [20:39<04:29,  2.31it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2379/3000:  79%|███████▉  | 2378/3000 [20:39<04:29,  2.31it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2379/3000:  79%|███████▉  | 2379/3000 [20:40<04:24,  2.35it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2379/3000:  79%|███████▉  | 2379/3000 [20:40<04:24,  2.35it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.45e+6]Epoch 2380/3000:  79%|███████▉  | 2379/3000 [20:40<04:24,  2.35it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.45e+6]Epoch 2380/3000:  79%|███████▉  | 2380/3000 [20:40<04:32,  2.28it/s, v_num=1, train_loss_step=1.53e+6, train_loss_epoch=1.45e+6]Epoch 2380/3000:  79%|███████▉  | 2380/3000 [20:40<04:32,  2.28it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2381/3000:  79%|███████▉  | 2380/3000 [20:40<04:32,  2.28it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2381/3000:  79%|███████▉  | 2381/3000 [20:41<04:26,  2.32it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2381/3000:  79%|███████▉  | 2381/3000 [20:41<04:26,  2.32it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.45e+6]Epoch 2382/3000:  79%|███████▉  | 2381/3000 [20:41<04:26,  2.32it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.45e+6]Epoch 2382/3000:  79%|███████▉  | 2382/3000 [20:41<04:33,  2.26it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.45e+6]Epoch 2382/3000:  79%|███████▉  | 2382/3000 [20:41<04:33,  2.26it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2383/3000:  79%|███████▉  | 2382/3000 [20:41<04:33,  2.26it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2383/3000:  79%|███████▉  | 2383/3000 [20:42<04:50,  2.12it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2383/3000:  79%|███████▉  | 2383/3000 [20:42<04:50,  2.12it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2384/3000:  79%|███████▉  | 2383/3000 [20:42<04:50,  2.12it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2384/3000:  79%|███████▉  | 2384/3000 [20:42<04:42,  2.18it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2384/3000:  79%|███████▉  | 2384/3000 [20:42<04:42,  2.18it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2385/3000:  79%|███████▉  | 2384/3000 [20:42<04:42,  2.18it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2385/3000:  80%|███████▉  | 2385/3000 [20:43<04:43,  2.17it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2385/3000:  80%|███████▉  | 2385/3000 [20:43<04:43,  2.17it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2386/3000:  80%|███████▉  | 2385/3000 [20:43<04:43,  2.17it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2386/3000:  80%|███████▉  | 2386/3000 [20:43<04:35,  2.23it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2386/3000:  80%|███████▉  | 2386/3000 [20:43<04:35,  2.23it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2387/3000:  80%|███████▉  | 2386/3000 [20:43<04:35,  2.23it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2387/3000:  80%|███████▉  | 2387/3000 [20:43<04:37,  2.21it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2387/3000:  80%|███████▉  | 2387/3000 [20:43<04:37,  2.21it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2388/3000:  80%|███████▉  | 2387/3000 [20:43<04:37,  2.21it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2388/3000:  80%|███████▉  | 2388/3000 [20:44<04:27,  2.28it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2388/3000:  80%|███████▉  | 2388/3000 [20:44<04:27,  2.28it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2389/3000:  80%|███████▉  | 2388/3000 [20:44<04:27,  2.28it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2389/3000:  80%|███████▉  | 2389/3000 [20:44<04:15,  2.40it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2389/3000:  80%|███████▉  | 2389/3000 [20:44<04:15,  2.40it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2390/3000:  80%|███████▉  | 2389/3000 [20:44<04:15,  2.40it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2390/3000:  80%|███████▉  | 2390/3000 [20:45<04:24,  2.31it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2390/3000:  80%|███████▉  | 2390/3000 [20:45<04:24,  2.31it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2391/3000:  80%|███████▉  | 2390/3000 [20:45<04:24,  2.31it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2391/3000:  80%|███████▉  | 2391/3000 [20:45<04:31,  2.25it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2391/3000:  80%|███████▉  | 2391/3000 [20:45<04:31,  2.25it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2392/3000:  80%|███████▉  | 2391/3000 [20:45<04:31,  2.25it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2392/3000:  80%|███████▉  | 2392/3000 [20:46<04:31,  2.24it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2392/3000:  80%|███████▉  | 2392/3000 [20:46<04:31,  2.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2393/3000:  80%|███████▉  | 2392/3000 [20:46<04:31,  2.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2393/3000:  80%|███████▉  | 2393/3000 [20:46<04:32,  2.23it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2393/3000:  80%|███████▉  | 2393/3000 [20:46<04:32,  2.23it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2394/3000:  80%|███████▉  | 2393/3000 [20:46<04:32,  2.23it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2394/3000:  80%|███████▉  | 2394/3000 [20:47<04:39,  2.17it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2394/3000:  80%|███████▉  | 2394/3000 [20:47<04:39,  2.17it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2395/3000:  80%|███████▉  | 2394/3000 [20:47<04:39,  2.17it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2395/3000:  80%|███████▉  | 2395/3000 [20:47<04:37,  2.18it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2395/3000:  80%|███████▉  | 2395/3000 [20:47<04:37,  2.18it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.45e+6]Epoch 2396/3000:  80%|███████▉  | 2395/3000 [20:47<04:37,  2.18it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.45e+6]Epoch 2396/3000:  80%|███████▉  | 2396/3000 [20:47<04:29,  2.24it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.45e+6]Epoch 2396/3000:  80%|███████▉  | 2396/3000 [20:47<04:29,  2.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2397/3000:  80%|███████▉  | 2396/3000 [20:47<04:29,  2.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2397/3000:  80%|███████▉  | 2397/3000 [20:48<04:31,  2.22it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2397/3000:  80%|███████▉  | 2397/3000 [20:48<04:31,  2.22it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2398/3000:  80%|███████▉  | 2397/3000 [20:48<04:31,  2.22it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2398/3000:  80%|███████▉  | 2398/3000 [20:48<04:25,  2.27it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2398/3000:  80%|███████▉  | 2398/3000 [20:48<04:25,  2.27it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2399/3000:  80%|███████▉  | 2398/3000 [20:48<04:25,  2.27it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2399/3000:  80%|███████▉  | 2399/3000 [20:49<04:19,  2.32it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2399/3000:  80%|███████▉  | 2399/3000 [20:49<04:19,  2.32it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2400/3000:  80%|███████▉  | 2399/3000 [20:49<04:19,  2.32it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2400/3000:  80%|████████  | 2400/3000 [20:49<04:11,  2.38it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2400/3000:  80%|████████  | 2400/3000 [20:49<04:11,  2.38it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2401/3000:  80%|████████  | 2400/3000 [20:49<04:11,  2.38it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2401/3000:  80%|████████  | 2401/3000 [20:50<04:25,  2.26it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2401/3000:  80%|████████  | 2401/3000 [20:50<04:25,  2.26it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2402/3000:  80%|████████  | 2401/3000 [20:50<04:25,  2.26it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2402/3000:  80%|████████  | 2402/3000 [20:50<04:21,  2.29it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2402/3000:  80%|████████  | 2402/3000 [20:50<04:21,  2.29it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2403/3000:  80%|████████  | 2402/3000 [20:50<04:21,  2.29it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2403/3000:  80%|████████  | 2403/3000 [20:50<04:16,  2.33it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2403/3000:  80%|████████  | 2403/3000 [20:50<04:16,  2.33it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2404/3000:  80%|████████  | 2403/3000 [20:50<04:16,  2.33it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2404/3000:  80%|████████  | 2404/3000 [20:51<04:26,  2.24it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2404/3000:  80%|████████  | 2404/3000 [20:51<04:26,  2.24it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2405/3000:  80%|████████  | 2404/3000 [20:51<04:26,  2.24it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2405/3000:  80%|████████  | 2405/3000 [20:51<04:25,  2.24it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2405/3000:  80%|████████  | 2405/3000 [20:51<04:25,  2.24it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2406/3000:  80%|████████  | 2405/3000 [20:51<04:25,  2.24it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2406/3000:  80%|████████  | 2406/3000 [20:52<04:17,  2.31it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2406/3000:  80%|████████  | 2406/3000 [20:52<04:17,  2.31it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2407/3000:  80%|████████  | 2406/3000 [20:52<04:17,  2.31it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2407/3000:  80%|████████  | 2407/3000 [20:52<04:14,  2.33it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2407/3000:  80%|████████  | 2407/3000 [20:52<04:14,  2.33it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2408/3000:  80%|████████  | 2407/3000 [20:52<04:14,  2.33it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2408/3000:  80%|████████  | 2408/3000 [20:53<04:03,  2.43it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2408/3000:  80%|████████  | 2408/3000 [20:53<04:03,  2.43it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2409/3000:  80%|████████  | 2408/3000 [20:53<04:03,  2.43it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2409/3000:  80%|████████  | 2409/3000 [20:53<04:09,  2.37it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2409/3000:  80%|████████  | 2409/3000 [20:53<04:09,  2.37it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2410/3000:  80%|████████  | 2409/3000 [20:53<04:09,  2.37it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2410/3000:  80%|████████  | 2410/3000 [20:53<03:58,  2.48it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2410/3000:  80%|████████  | 2410/3000 [20:53<03:58,  2.48it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2411/3000:  80%|████████  | 2410/3000 [20:53<03:58,  2.48it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2411/3000:  80%|████████  | 2411/3000 [20:54<03:58,  2.47it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2411/3000:  80%|████████  | 2411/3000 [20:54<03:58,  2.47it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2412/3000:  80%|████████  | 2411/3000 [20:54<03:58,  2.47it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2412/3000:  80%|████████  | 2412/3000 [20:54<03:56,  2.48it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2412/3000:  80%|████████  | 2412/3000 [20:54<03:56,  2.48it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.45e+6]Epoch 2413/3000:  80%|████████  | 2412/3000 [20:54<03:56,  2.48it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.45e+6]Epoch 2413/3000:  80%|████████  | 2413/3000 [20:55<03:58,  2.46it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.45e+6]Epoch 2413/3000:  80%|████████  | 2413/3000 [20:55<03:58,  2.46it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2414/3000:  80%|████████  | 2413/3000 [20:55<03:58,  2.46it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2414/3000:  80%|████████  | 2414/3000 [20:55<04:07,  2.36it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2414/3000:  80%|████████  | 2414/3000 [20:55<04:07,  2.36it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2415/3000:  80%|████████  | 2414/3000 [20:55<04:07,  2.36it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2415/3000:  80%|████████  | 2415/3000 [20:55<04:02,  2.41it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2415/3000:  80%|████████  | 2415/3000 [20:55<04:02,  2.41it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2416/3000:  80%|████████  | 2415/3000 [20:55<04:02,  2.41it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2416/3000:  81%|████████  | 2416/3000 [20:56<04:14,  2.29it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2416/3000:  81%|████████  | 2416/3000 [20:56<04:14,  2.29it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.45e+6] Epoch 2417/3000:  81%|████████  | 2416/3000 [20:56<04:14,  2.29it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.45e+6]Epoch 2417/3000:  81%|████████  | 2417/3000 [20:56<04:30,  2.15it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.45e+6]Epoch 2417/3000:  81%|████████  | 2417/3000 [20:56<04:30,  2.15it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.45e+6]Epoch 2418/3000:  81%|████████  | 2417/3000 [20:56<04:30,  2.15it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.45e+6]Epoch 2418/3000:  81%|████████  | 2418/3000 [20:57<04:32,  2.14it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.45e+6]Epoch 2418/3000:  81%|████████  | 2418/3000 [20:57<04:32,  2.14it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2419/3000:  81%|████████  | 2418/3000 [20:57<04:32,  2.14it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2419/3000:  81%|████████  | 2419/3000 [20:57<04:28,  2.17it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2419/3000:  81%|████████  | 2419/3000 [20:57<04:28,  2.17it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2420/3000:  81%|████████  | 2419/3000 [20:57<04:28,  2.17it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2420/3000:  81%|████████  | 2420/3000 [20:58<04:12,  2.29it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2420/3000:  81%|████████  | 2420/3000 [20:58<04:12,  2.29it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2421/3000:  81%|████████  | 2420/3000 [20:58<04:12,  2.29it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2421/3000:  81%|████████  | 2421/3000 [20:58<04:21,  2.22it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2421/3000:  81%|████████  | 2421/3000 [20:58<04:21,  2.22it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2422/3000:  81%|████████  | 2421/3000 [20:58<04:21,  2.22it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2422/3000:  81%|████████  | 2422/3000 [20:59<04:11,  2.29it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2422/3000:  81%|████████  | 2422/3000 [20:59<04:11,  2.29it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.45e+6]Epoch 2423/3000:  81%|████████  | 2422/3000 [20:59<04:11,  2.29it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.45e+6]Epoch 2423/3000:  81%|████████  | 2423/3000 [20:59<04:11,  2.29it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.45e+6]Epoch 2423/3000:  81%|████████  | 2423/3000 [20:59<04:11,  2.29it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2424/3000:  81%|████████  | 2423/3000 [20:59<04:11,  2.29it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2424/3000:  81%|████████  | 2424/3000 [20:59<04:07,  2.33it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2424/3000:  81%|████████  | 2424/3000 [20:59<04:07,  2.33it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2425/3000:  81%|████████  | 2424/3000 [20:59<04:07,  2.33it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2425/3000:  81%|████████  | 2425/3000 [21:00<04:00,  2.40it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2425/3000:  81%|████████  | 2425/3000 [21:00<04:00,  2.40it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.45e+6]Epoch 2426/3000:  81%|████████  | 2425/3000 [21:00<04:00,  2.40it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.45e+6]Epoch 2426/3000:  81%|████████  | 2426/3000 [21:00<04:14,  2.26it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.45e+6]Epoch 2426/3000:  81%|████████  | 2426/3000 [21:00<04:14,  2.26it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2427/3000:  81%|████████  | 2426/3000 [21:00<04:14,  2.26it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2427/3000:  81%|████████  | 2427/3000 [21:01<04:15,  2.24it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2427/3000:  81%|████████  | 2427/3000 [21:01<04:15,  2.24it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2428/3000:  81%|████████  | 2427/3000 [21:01<04:15,  2.24it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2428/3000:  81%|████████  | 2428/3000 [21:01<04:25,  2.15it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2428/3000:  81%|████████  | 2428/3000 [21:01<04:25,  2.15it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.45e+6] Epoch 2429/3000:  81%|████████  | 2428/3000 [21:01<04:25,  2.15it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.45e+6]Epoch 2429/3000:  81%|████████  | 2429/3000 [21:02<04:12,  2.26it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.45e+6]Epoch 2429/3000:  81%|████████  | 2429/3000 [21:02<04:12,  2.26it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2430/3000:  81%|████████  | 2429/3000 [21:02<04:12,  2.26it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2430/3000:  81%|████████  | 2430/3000 [21:02<04:24,  2.16it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2430/3000:  81%|████████  | 2430/3000 [21:02<04:24,  2.16it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2431/3000:  81%|████████  | 2430/3000 [21:02<04:24,  2.16it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2431/3000:  81%|████████  | 2431/3000 [21:03<04:25,  2.14it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2431/3000:  81%|████████  | 2431/3000 [21:03<04:25,  2.14it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2432/3000:  81%|████████  | 2431/3000 [21:03<04:25,  2.14it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2432/3000:  81%|████████  | 2432/3000 [21:03<04:21,  2.17it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2432/3000:  81%|████████  | 2432/3000 [21:03<04:21,  2.17it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2433/3000:  81%|████████  | 2432/3000 [21:03<04:21,  2.17it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2433/3000:  81%|████████  | 2433/3000 [21:04<04:16,  2.21it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2433/3000:  81%|████████  | 2433/3000 [21:04<04:16,  2.21it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2434/3000:  81%|████████  | 2433/3000 [21:04<04:16,  2.21it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2434/3000:  81%|████████  | 2434/3000 [21:04<04:09,  2.27it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2434/3000:  81%|████████  | 2434/3000 [21:04<04:09,  2.27it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2435/3000:  81%|████████  | 2434/3000 [21:04<04:09,  2.27it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2435/3000:  81%|████████  | 2435/3000 [21:04<04:08,  2.27it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2435/3000:  81%|████████  | 2435/3000 [21:04<04:08,  2.27it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2436/3000:  81%|████████  | 2435/3000 [21:04<04:08,  2.27it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2436/3000:  81%|████████  | 2436/3000 [21:05<04:11,  2.24it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2436/3000:  81%|████████  | 2436/3000 [21:05<04:11,  2.24it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2437/3000:  81%|████████  | 2436/3000 [21:05<04:11,  2.24it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2437/3000:  81%|████████  | 2437/3000 [21:05<04:03,  2.31it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2437/3000:  81%|████████  | 2437/3000 [21:05<04:03,  2.31it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2438/3000:  81%|████████  | 2437/3000 [21:05<04:03,  2.31it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2438/3000:  81%|████████▏ | 2438/3000 [21:06<04:05,  2.29it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2438/3000:  81%|████████▏ | 2438/3000 [21:06<04:05,  2.29it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2439/3000:  81%|████████▏ | 2438/3000 [21:06<04:05,  2.29it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2439/3000:  81%|████████▏ | 2439/3000 [21:06<04:05,  2.29it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2439/3000:  81%|████████▏ | 2439/3000 [21:06<04:05,  2.29it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.45e+6]Epoch 2440/3000:  81%|████████▏ | 2439/3000 [21:06<04:05,  2.29it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.45e+6]Epoch 2440/3000:  81%|████████▏ | 2440/3000 [21:07<04:08,  2.25it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.45e+6]Epoch 2440/3000:  81%|████████▏ | 2440/3000 [21:07<04:08,  2.25it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2441/3000:  81%|████████▏ | 2440/3000 [21:07<04:08,  2.25it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2441/3000:  81%|████████▏ | 2441/3000 [21:07<04:14,  2.20it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2441/3000:  81%|████████▏ | 2441/3000 [21:07<04:14,  2.20it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2442/3000:  81%|████████▏ | 2441/3000 [21:07<04:14,  2.20it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2442/3000:  81%|████████▏ | 2442/3000 [21:08<04:20,  2.14it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2442/3000:  81%|████████▏ | 2442/3000 [21:08<04:20,  2.14it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2443/3000:  81%|████████▏ | 2442/3000 [21:08<04:20,  2.14it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2443/3000:  81%|████████▏ | 2443/3000 [21:08<04:19,  2.15it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2443/3000:  81%|████████▏ | 2443/3000 [21:08<04:19,  2.15it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2444/3000:  81%|████████▏ | 2443/3000 [21:08<04:19,  2.15it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2444/3000:  81%|████████▏ | 2444/3000 [21:09<04:18,  2.15it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2444/3000:  81%|████████▏ | 2444/3000 [21:09<04:18,  2.15it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2445/3000:  81%|████████▏ | 2444/3000 [21:09<04:18,  2.15it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2445/3000:  82%|████████▏ | 2445/3000 [21:09<04:15,  2.17it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2445/3000:  82%|████████▏ | 2445/3000 [21:09<04:15,  2.17it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2446/3000:  82%|████████▏ | 2445/3000 [21:09<04:15,  2.17it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2446/3000:  82%|████████▏ | 2446/3000 [21:09<04:05,  2.26it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2446/3000:  82%|████████▏ | 2446/3000 [21:09<04:05,  2.26it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.45e+6] Epoch 2447/3000:  82%|████████▏ | 2446/3000 [21:09<04:05,  2.26it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.45e+6]Epoch 2447/3000:  82%|████████▏ | 2447/3000 [21:10<04:00,  2.30it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.45e+6]Epoch 2447/3000:  82%|████████▏ | 2447/3000 [21:10<04:00,  2.30it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2448/3000:  82%|████████▏ | 2447/3000 [21:10<04:00,  2.30it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2448/3000:  82%|████████▏ | 2448/3000 [21:10<04:05,  2.24it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2448/3000:  82%|████████▏ | 2448/3000 [21:10<04:05,  2.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2449/3000:  82%|████████▏ | 2448/3000 [21:10<04:05,  2.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2449/3000:  82%|████████▏ | 2449/3000 [21:11<04:07,  2.23it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2449/3000:  82%|████████▏ | 2449/3000 [21:11<04:07,  2.23it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2450/3000:  82%|████████▏ | 2449/3000 [21:11<04:07,  2.23it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2450/3000:  82%|████████▏ | 2450/3000 [21:11<04:01,  2.28it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2450/3000:  82%|████████▏ | 2450/3000 [21:11<04:01,  2.28it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2451/3000:  82%|████████▏ | 2450/3000 [21:11<04:01,  2.28it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2451/3000:  82%|████████▏ | 2451/3000 [21:12<04:20,  2.11it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2451/3000:  82%|████████▏ | 2451/3000 [21:12<04:20,  2.11it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2452/3000:  82%|████████▏ | 2451/3000 [21:12<04:20,  2.11it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2452/3000:  82%|████████▏ | 2452/3000 [21:12<04:21,  2.09it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2452/3000:  82%|████████▏ | 2452/3000 [21:12<04:21,  2.09it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2453/3000:  82%|████████▏ | 2452/3000 [21:12<04:21,  2.09it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2453/3000:  82%|████████▏ | 2453/3000 [21:13<04:10,  2.18it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2453/3000:  82%|████████▏ | 2453/3000 [21:13<04:10,  2.18it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2454/3000:  82%|████████▏ | 2453/3000 [21:13<04:10,  2.18it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2454/3000:  82%|████████▏ | 2454/3000 [21:13<04:13,  2.15it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2454/3000:  82%|████████▏ | 2454/3000 [21:13<04:13,  2.15it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2455/3000:  82%|████████▏ | 2454/3000 [21:13<04:13,  2.15it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2455/3000:  82%|████████▏ | 2455/3000 [21:13<04:03,  2.24it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2455/3000:  82%|████████▏ | 2455/3000 [21:13<04:03,  2.24it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2456/3000:  82%|████████▏ | 2455/3000 [21:13<04:03,  2.24it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2456/3000:  82%|████████▏ | 2456/3000 [21:14<03:58,  2.28it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2456/3000:  82%|████████▏ | 2456/3000 [21:14<03:58,  2.28it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2457/3000:  82%|████████▏ | 2456/3000 [21:14<03:58,  2.28it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2457/3000:  82%|████████▏ | 2457/3000 [21:14<04:01,  2.25it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2457/3000:  82%|████████▏ | 2457/3000 [21:14<04:01,  2.25it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.45e+6]Epoch 2458/3000:  82%|████████▏ | 2457/3000 [21:14<04:01,  2.25it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.45e+6]Epoch 2458/3000:  82%|████████▏ | 2458/3000 [21:15<03:55,  2.30it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.45e+6]Epoch 2458/3000:  82%|████████▏ | 2458/3000 [21:15<03:55,  2.30it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.45e+6] Epoch 2459/3000:  82%|████████▏ | 2458/3000 [21:15<03:55,  2.30it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.45e+6]Epoch 2459/3000:  82%|████████▏ | 2459/3000 [21:15<04:05,  2.20it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.45e+6]Epoch 2459/3000:  82%|████████▏ | 2459/3000 [21:15<04:05,  2.20it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.45e+6]Epoch 2460/3000:  82%|████████▏ | 2459/3000 [21:15<04:05,  2.20it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.45e+6]Epoch 2460/3000:  82%|████████▏ | 2460/3000 [21:16<04:01,  2.23it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.45e+6]Epoch 2460/3000:  82%|████████▏ | 2460/3000 [21:16<04:01,  2.23it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2461/3000:  82%|████████▏ | 2460/3000 [21:16<04:01,  2.23it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2461/3000:  82%|████████▏ | 2461/3000 [21:16<03:59,  2.25it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2461/3000:  82%|████████▏ | 2461/3000 [21:16<03:59,  2.25it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2462/3000:  82%|████████▏ | 2461/3000 [21:16<03:59,  2.25it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2462/3000:  82%|████████▏ | 2462/3000 [21:17<04:11,  2.14it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2462/3000:  82%|████████▏ | 2462/3000 [21:17<04:11,  2.14it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2463/3000:  82%|████████▏ | 2462/3000 [21:17<04:11,  2.14it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2463/3000:  82%|████████▏ | 2463/3000 [21:17<04:18,  2.08it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2463/3000:  82%|████████▏ | 2463/3000 [21:17<04:18,  2.08it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2464/3000:  82%|████████▏ | 2463/3000 [21:17<04:18,  2.08it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2464/3000:  82%|████████▏ | 2464/3000 [21:18<04:14,  2.11it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2464/3000:  82%|████████▏ | 2464/3000 [21:18<04:14,  2.11it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2465/3000:  82%|████████▏ | 2464/3000 [21:18<04:14,  2.11it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2465/3000:  82%|████████▏ | 2465/3000 [21:18<04:00,  2.23it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2465/3000:  82%|████████▏ | 2465/3000 [21:18<04:00,  2.23it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2466/3000:  82%|████████▏ | 2465/3000 [21:18<04:00,  2.23it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2466/3000:  82%|████████▏ | 2466/3000 [21:18<04:00,  2.22it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2466/3000:  82%|████████▏ | 2466/3000 [21:18<04:00,  2.22it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.45e+6]Epoch 2467/3000:  82%|████████▏ | 2466/3000 [21:18<04:00,  2.22it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.45e+6]Epoch 2467/3000:  82%|████████▏ | 2467/3000 [21:19<03:57,  2.24it/s, v_num=1, train_loss_step=1.51e+6, train_loss_epoch=1.45e+6]Epoch 2467/3000:  82%|████████▏ | 2467/3000 [21:19<03:57,  2.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2468/3000:  82%|████████▏ | 2467/3000 [21:19<03:57,  2.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2468/3000:  82%|████████▏ | 2468/3000 [21:19<03:53,  2.28it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2468/3000:  82%|████████▏ | 2468/3000 [21:19<03:53,  2.28it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2469/3000:  82%|████████▏ | 2468/3000 [21:19<03:53,  2.28it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2469/3000:  82%|████████▏ | 2469/3000 [21:20<03:44,  2.37it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2469/3000:  82%|████████▏ | 2469/3000 [21:20<03:44,  2.37it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2470/3000:  82%|████████▏ | 2469/3000 [21:20<03:44,  2.37it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2470/3000:  82%|████████▏ | 2470/3000 [21:20<03:44,  2.36it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2470/3000:  82%|████████▏ | 2470/3000 [21:20<03:44,  2.36it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2471/3000:  82%|████████▏ | 2470/3000 [21:20<03:44,  2.36it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2471/3000:  82%|████████▏ | 2471/3000 [21:21<03:45,  2.34it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2471/3000:  82%|████████▏ | 2471/3000 [21:21<03:45,  2.34it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2472/3000:  82%|████████▏ | 2471/3000 [21:21<03:45,  2.34it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2472/3000:  82%|████████▏ | 2472/3000 [21:21<03:56,  2.23it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2472/3000:  82%|████████▏ | 2472/3000 [21:21<03:56,  2.23it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2473/3000:  82%|████████▏ | 2472/3000 [21:21<03:56,  2.23it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2473/3000:  82%|████████▏ | 2473/3000 [21:22<03:56,  2.22it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2473/3000:  82%|████████▏ | 2473/3000 [21:22<03:56,  2.22it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2474/3000:  82%|████████▏ | 2473/3000 [21:22<03:56,  2.22it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2474/3000:  82%|████████▏ | 2474/3000 [21:22<03:45,  2.33it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2474/3000:  82%|████████▏ | 2474/3000 [21:22<03:45,  2.33it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2475/3000:  82%|████████▏ | 2474/3000 [21:22<03:45,  2.33it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2475/3000:  82%|████████▎ | 2475/3000 [21:22<03:41,  2.37it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2475/3000:  82%|████████▎ | 2475/3000 [21:22<03:41,  2.37it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2476/3000:  82%|████████▎ | 2475/3000 [21:22<03:41,  2.37it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2476/3000:  83%|████████▎ | 2476/3000 [21:23<03:42,  2.35it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2476/3000:  83%|████████▎ | 2476/3000 [21:23<03:42,  2.35it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.45e+6]Epoch 2477/3000:  83%|████████▎ | 2476/3000 [21:23<03:42,  2.35it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.45e+6]Epoch 2477/3000:  83%|████████▎ | 2477/3000 [21:23<03:44,  2.33it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.45e+6]Epoch 2477/3000:  83%|████████▎ | 2477/3000 [21:23<03:44,  2.33it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2478/3000:  83%|████████▎ | 2477/3000 [21:23<03:44,  2.33it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2478/3000:  83%|████████▎ | 2478/3000 [21:24<03:32,  2.46it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2478/3000:  83%|████████▎ | 2478/3000 [21:24<03:32,  2.46it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2479/3000:  83%|████████▎ | 2478/3000 [21:24<03:32,  2.46it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2479/3000:  83%|████████▎ | 2479/3000 [21:24<03:31,  2.46it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2479/3000:  83%|████████▎ | 2479/3000 [21:24<03:31,  2.46it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.45e+6]Epoch 2480/3000:  83%|████████▎ | 2479/3000 [21:24<03:31,  2.46it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.45e+6]Epoch 2480/3000:  83%|████████▎ | 2480/3000 [21:24<03:33,  2.43it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.45e+6]Epoch 2480/3000:  83%|████████▎ | 2480/3000 [21:24<03:33,  2.43it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2481/3000:  83%|████████▎ | 2480/3000 [21:24<03:33,  2.43it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2481/3000:  83%|████████▎ | 2481/3000 [21:25<03:30,  2.46it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2481/3000:  83%|████████▎ | 2481/3000 [21:25<03:30,  2.46it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2482/3000:  83%|████████▎ | 2481/3000 [21:25<03:30,  2.46it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2482/3000:  83%|████████▎ | 2482/3000 [21:25<03:33,  2.42it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2482/3000:  83%|████████▎ | 2482/3000 [21:25<03:33,  2.42it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.45e+6]Epoch 2483/3000:  83%|████████▎ | 2482/3000 [21:25<03:33,  2.42it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.45e+6]Epoch 2483/3000:  83%|████████▎ | 2483/3000 [21:26<03:17,  2.61it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.45e+6]Epoch 2483/3000:  83%|████████▎ | 2483/3000 [21:26<03:17,  2.61it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2484/3000:  83%|████████▎ | 2483/3000 [21:26<03:17,  2.61it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2484/3000:  83%|████████▎ | 2484/3000 [21:26<03:27,  2.49it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2484/3000:  83%|████████▎ | 2484/3000 [21:26<03:27,  2.49it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2485/3000:  83%|████████▎ | 2484/3000 [21:26<03:27,  2.49it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2485/3000:  83%|████████▎ | 2485/3000 [21:26<03:46,  2.27it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2485/3000:  83%|████████▎ | 2485/3000 [21:26<03:46,  2.27it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2486/3000:  83%|████████▎ | 2485/3000 [21:26<03:46,  2.27it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2486/3000:  83%|████████▎ | 2486/3000 [21:27<03:50,  2.23it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2486/3000:  83%|████████▎ | 2486/3000 [21:27<03:50,  2.23it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2487/3000:  83%|████████▎ | 2486/3000 [21:27<03:50,  2.23it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2487/3000:  83%|████████▎ | 2487/3000 [21:27<03:49,  2.23it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2487/3000:  83%|████████▎ | 2487/3000 [21:27<03:49,  2.23it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2488/3000:  83%|████████▎ | 2487/3000 [21:27<03:49,  2.23it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2488/3000:  83%|████████▎ | 2488/3000 [21:28<04:00,  2.13it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2488/3000:  83%|████████▎ | 2488/3000 [21:28<04:00,  2.13it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2489/3000:  83%|████████▎ | 2488/3000 [21:28<04:00,  2.13it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2489/3000:  83%|████████▎ | 2489/3000 [21:28<04:00,  2.12it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2489/3000:  83%|████████▎ | 2489/3000 [21:28<04:00,  2.12it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2490/3000:  83%|████████▎ | 2489/3000 [21:28<04:00,  2.12it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2490/3000:  83%|████████▎ | 2490/3000 [21:29<04:01,  2.11it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2490/3000:  83%|████████▎ | 2490/3000 [21:29<04:01,  2.11it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2491/3000:  83%|████████▎ | 2490/3000 [21:29<04:01,  2.11it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2491/3000:  83%|████████▎ | 2491/3000 [21:29<04:02,  2.10it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2491/3000:  83%|████████▎ | 2491/3000 [21:29<04:02,  2.10it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2492/3000:  83%|████████▎ | 2491/3000 [21:29<04:02,  2.10it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2492/3000:  83%|████████▎ | 2492/3000 [21:30<03:51,  2.19it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2492/3000:  83%|████████▎ | 2492/3000 [21:30<03:51,  2.19it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2493/3000:  83%|████████▎ | 2492/3000 [21:30<03:51,  2.19it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2493/3000:  83%|████████▎ | 2493/3000 [21:30<03:53,  2.17it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2493/3000:  83%|████████▎ | 2493/3000 [21:30<03:53,  2.17it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2494/3000:  83%|████████▎ | 2493/3000 [21:30<03:53,  2.17it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2494/3000:  83%|████████▎ | 2494/3000 [21:31<03:48,  2.21it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2494/3000:  83%|████████▎ | 2494/3000 [21:31<03:48,  2.21it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2495/3000:  83%|████████▎ | 2494/3000 [21:31<03:48,  2.21it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2495/3000:  83%|████████▎ | 2495/3000 [21:31<03:45,  2.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2495/3000:  83%|████████▎ | 2495/3000 [21:31<03:45,  2.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2496/3000:  83%|████████▎ | 2495/3000 [21:31<03:45,  2.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2496/3000:  83%|████████▎ | 2496/3000 [21:32<03:48,  2.21it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2496/3000:  83%|████████▎ | 2496/3000 [21:32<03:48,  2.21it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.45e+6]Epoch 2497/3000:  83%|████████▎ | 2496/3000 [21:32<03:48,  2.21it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.45e+6]Epoch 2497/3000:  83%|████████▎ | 2497/3000 [21:32<03:41,  2.27it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.45e+6]Epoch 2497/3000:  83%|████████▎ | 2497/3000 [21:32<03:41,  2.27it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2498/3000:  83%|████████▎ | 2497/3000 [21:32<03:41,  2.27it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2498/3000:  83%|████████▎ | 2498/3000 [21:32<03:46,  2.21it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2498/3000:  83%|████████▎ | 2498/3000 [21:32<03:46,  2.21it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2499/3000:  83%|████████▎ | 2498/3000 [21:32<03:46,  2.21it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2499/3000:  83%|████████▎ | 2499/3000 [21:33<03:56,  2.12it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2499/3000:  83%|████████▎ | 2499/3000 [21:33<03:56,  2.12it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2500/3000:  83%|████████▎ | 2499/3000 [21:33<03:56,  2.12it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2500/3000:  83%|████████▎ | 2500/3000 [21:33<03:46,  2.21it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2500/3000:  83%|████████▎ | 2500/3000 [21:33<03:46,  2.21it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2501/3000:  83%|████████▎ | 2500/3000 [21:33<03:46,  2.21it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2501/3000:  83%|████████▎ | 2501/3000 [21:34<03:31,  2.36it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2501/3000:  83%|████████▎ | 2501/3000 [21:34<03:31,  2.36it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2502/3000:  83%|████████▎ | 2501/3000 [21:34<03:31,  2.36it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2502/3000:  83%|████████▎ | 2502/3000 [21:34<03:36,  2.30it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2502/3000:  83%|████████▎ | 2502/3000 [21:34<03:36,  2.30it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2503/3000:  83%|████████▎ | 2502/3000 [21:34<03:36,  2.30it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2503/3000:  83%|████████▎ | 2503/3000 [21:35<03:30,  2.36it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2503/3000:  83%|████████▎ | 2503/3000 [21:35<03:30,  2.36it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2504/3000:  83%|████████▎ | 2503/3000 [21:35<03:30,  2.36it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2504/3000:  83%|████████▎ | 2504/3000 [21:35<03:25,  2.41it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2504/3000:  83%|████████▎ | 2504/3000 [21:35<03:25,  2.41it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2505/3000:  83%|████████▎ | 2504/3000 [21:35<03:25,  2.41it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2505/3000:  84%|████████▎ | 2505/3000 [21:35<03:29,  2.36it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2505/3000:  84%|████████▎ | 2505/3000 [21:35<03:29,  2.36it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2506/3000:  84%|████████▎ | 2505/3000 [21:35<03:29,  2.36it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2506/3000:  84%|████████▎ | 2506/3000 [21:36<03:45,  2.19it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2506/3000:  84%|████████▎ | 2506/3000 [21:36<03:45,  2.19it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2507/3000:  84%|████████▎ | 2506/3000 [21:36<03:45,  2.19it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2507/3000:  84%|████████▎ | 2507/3000 [21:37<04:11,  1.96it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2507/3000:  84%|████████▎ | 2507/3000 [21:37<04:11,  1.96it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2508/3000:  84%|████████▎ | 2507/3000 [21:37<04:11,  1.96it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2508/3000:  84%|████████▎ | 2508/3000 [21:37<05:01,  1.63it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2508/3000:  84%|████████▎ | 2508/3000 [21:37<05:01,  1.63it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2509/3000:  84%|████████▎ | 2508/3000 [21:37<05:01,  1.63it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2509/3000:  84%|████████▎ | 2509/3000 [21:38<05:18,  1.54it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2509/3000:  84%|████████▎ | 2509/3000 [21:38<05:18,  1.54it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2510/3000:  84%|████████▎ | 2509/3000 [21:38<05:18,  1.54it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2510/3000:  84%|████████▎ | 2510/3000 [21:39<05:08,  1.59it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2510/3000:  84%|████████▎ | 2510/3000 [21:39<05:08,  1.59it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2511/3000:  84%|████████▎ | 2510/3000 [21:39<05:08,  1.59it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2511/3000:  84%|████████▎ | 2511/3000 [21:39<05:00,  1.63it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2511/3000:  84%|████████▎ | 2511/3000 [21:39<05:00,  1.63it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2512/3000:  84%|████████▎ | 2511/3000 [21:39<05:00,  1.63it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2512/3000:  84%|████████▎ | 2512/3000 [21:40<04:54,  1.65it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2512/3000:  84%|████████▎ | 2512/3000 [21:40<04:54,  1.65it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2513/3000:  84%|████████▎ | 2512/3000 [21:40<04:54,  1.65it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2513/3000:  84%|████████▍ | 2513/3000 [21:40<04:37,  1.75it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2513/3000:  84%|████████▍ | 2513/3000 [21:40<04:37,  1.75it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2514/3000:  84%|████████▍ | 2513/3000 [21:40<04:37,  1.75it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2514/3000:  84%|████████▍ | 2514/3000 [21:41<04:37,  1.75it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2514/3000:  84%|████████▍ | 2514/3000 [21:41<04:37,  1.75it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2515/3000:  84%|████████▍ | 2514/3000 [21:41<04:37,  1.75it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2515/3000:  84%|████████▍ | 2515/3000 [21:42<04:48,  1.68it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2515/3000:  84%|████████▍ | 2515/3000 [21:42<04:48,  1.68it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2516/3000:  84%|████████▍ | 2515/3000 [21:42<04:48,  1.68it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2516/3000:  84%|████████▍ | 2516/3000 [21:42<04:51,  1.66it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2516/3000:  84%|████████▍ | 2516/3000 [21:42<04:51,  1.66it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2517/3000:  84%|████████▍ | 2516/3000 [21:42<04:51,  1.66it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2517/3000:  84%|████████▍ | 2517/3000 [21:43<04:50,  1.66it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2517/3000:  84%|████████▍ | 2517/3000 [21:43<04:50,  1.66it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2518/3000:  84%|████████▍ | 2517/3000 [21:43<04:50,  1.66it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2518/3000:  84%|████████▍ | 2518/3000 [21:44<04:54,  1.63it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2518/3000:  84%|████████▍ | 2518/3000 [21:44<04:54,  1.63it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2519/3000:  84%|████████▍ | 2518/3000 [21:44<04:54,  1.63it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2519/3000:  84%|████████▍ | 2519/3000 [21:44<04:48,  1.67it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2519/3000:  84%|████████▍ | 2519/3000 [21:44<04:48,  1.67it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2520/3000:  84%|████████▍ | 2519/3000 [21:44<04:48,  1.67it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2520/3000:  84%|████████▍ | 2520/3000 [21:45<04:43,  1.69it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2520/3000:  84%|████████▍ | 2520/3000 [21:45<04:43,  1.69it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.45e+6]Epoch 2521/3000:  84%|████████▍ | 2520/3000 [21:45<04:43,  1.69it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.45e+6]Epoch 2521/3000:  84%|████████▍ | 2521/3000 [21:45<04:51,  1.64it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.45e+6]Epoch 2521/3000:  84%|████████▍ | 2521/3000 [21:45<04:51,  1.64it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2522/3000:  84%|████████▍ | 2521/3000 [21:45<04:51,  1.64it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2522/3000:  84%|████████▍ | 2522/3000 [21:46<04:45,  1.67it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2522/3000:  84%|████████▍ | 2522/3000 [21:46<04:45,  1.67it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2523/3000:  84%|████████▍ | 2522/3000 [21:46<04:45,  1.67it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2523/3000:  84%|████████▍ | 2523/3000 [21:46<04:30,  1.77it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2523/3000:  84%|████████▍ | 2523/3000 [21:46<04:30,  1.77it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.45e+6]Epoch 2524/3000:  84%|████████▍ | 2523/3000 [21:46<04:30,  1.77it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.45e+6]Epoch 2524/3000:  84%|████████▍ | 2524/3000 [21:47<04:40,  1.70it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.45e+6]Epoch 2524/3000:  84%|████████▍ | 2524/3000 [21:47<04:40,  1.70it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2525/3000:  84%|████████▍ | 2524/3000 [21:47<04:40,  1.70it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2525/3000:  84%|████████▍ | 2525/3000 [21:47<04:25,  1.79it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.45e+6]Epoch 2525/3000:  84%|████████▍ | 2525/3000 [21:47<04:25,  1.79it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2526/3000:  84%|████████▍ | 2525/3000 [21:48<04:25,  1.79it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2526/3000:  84%|████████▍ | 2526/3000 [21:48<04:22,  1.81it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.45e+6]Epoch 2526/3000:  84%|████████▍ | 2526/3000 [21:48<04:22,  1.81it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2527/3000:  84%|████████▍ | 2526/3000 [21:48<04:22,  1.81it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2527/3000:  84%|████████▍ | 2527/3000 [21:49<04:37,  1.71it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.45e+6]Epoch 2527/3000:  84%|████████▍ | 2527/3000 [21:49<04:37,  1.71it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2528/3000:  84%|████████▍ | 2527/3000 [21:49<04:37,  1.71it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2528/3000:  84%|████████▍ | 2528/3000 [21:49<04:18,  1.83it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2528/3000:  84%|████████▍ | 2528/3000 [21:49<04:18,  1.83it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2529/3000:  84%|████████▍ | 2528/3000 [21:49<04:18,  1.83it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2529/3000:  84%|████████▍ | 2529/3000 [21:50<04:07,  1.91it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2529/3000:  84%|████████▍ | 2529/3000 [21:50<04:07,  1.91it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2530/3000:  84%|████████▍ | 2529/3000 [21:50<04:07,  1.91it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2530/3000:  84%|████████▍ | 2530/3000 [21:50<04:02,  1.94it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2530/3000:  84%|████████▍ | 2530/3000 [21:50<04:02,  1.94it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.45e+6]Epoch 2531/3000:  84%|████████▍ | 2530/3000 [21:50<04:02,  1.94it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.45e+6]Epoch 2531/3000:  84%|████████▍ | 2531/3000 [21:51<04:07,  1.89it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.45e+6]Epoch 2531/3000:  84%|████████▍ | 2531/3000 [21:51<04:07,  1.89it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2532/3000:  84%|████████▍ | 2531/3000 [21:51<04:07,  1.89it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2532/3000:  84%|████████▍ | 2532/3000 [21:51<04:16,  1.83it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2532/3000:  84%|████████▍ | 2532/3000 [21:51<04:16,  1.83it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2533/3000:  84%|████████▍ | 2532/3000 [21:51<04:16,  1.83it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2533/3000:  84%|████████▍ | 2533/3000 [21:52<04:36,  1.69it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2533/3000:  84%|████████▍ | 2533/3000 [21:52<04:36,  1.69it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2534/3000:  84%|████████▍ | 2533/3000 [21:52<04:36,  1.69it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2534/3000:  84%|████████▍ | 2534/3000 [21:53<04:27,  1.74it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.45e+6]Epoch 2534/3000:  84%|████████▍ | 2534/3000 [21:53<04:27,  1.74it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.45e+6] Epoch 2535/3000:  84%|████████▍ | 2534/3000 [21:53<04:27,  1.74it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.45e+6]Epoch 2535/3000:  84%|████████▍ | 2535/3000 [21:53<04:26,  1.74it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.45e+6]Epoch 2535/3000:  84%|████████▍ | 2535/3000 [21:53<04:26,  1.74it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2536/3000:  84%|████████▍ | 2535/3000 [21:53<04:26,  1.74it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2536/3000:  85%|████████▍ | 2536/3000 [21:54<04:13,  1.83it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.45e+6]Epoch 2536/3000:  85%|████████▍ | 2536/3000 [21:54<04:13,  1.83it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.45e+6]Epoch 2537/3000:  85%|████████▍ | 2536/3000 [21:54<04:13,  1.83it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.45e+6]Epoch 2537/3000:  85%|████████▍ | 2537/3000 [21:54<05:02,  1.53it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.45e+6]Epoch 2537/3000:  85%|████████▍ | 2537/3000 [21:54<05:02,  1.53it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2538/3000:  85%|████████▍ | 2537/3000 [21:54<05:02,  1.53it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2538/3000:  85%|████████▍ | 2538/3000 [21:55<04:49,  1.60it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2538/3000:  85%|████████▍ | 2538/3000 [21:55<04:49,  1.60it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.45e+6] Epoch 2539/3000:  85%|████████▍ | 2538/3000 [21:55<04:49,  1.60it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.45e+6]Epoch 2539/3000:  85%|████████▍ | 2539/3000 [21:55<04:22,  1.75it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.45e+6]Epoch 2539/3000:  85%|████████▍ | 2539/3000 [21:55<04:22,  1.75it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2540/3000:  85%|████████▍ | 2539/3000 [21:55<04:22,  1.75it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2540/3000:  85%|████████▍ | 2540/3000 [21:56<04:04,  1.88it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2540/3000:  85%|████████▍ | 2540/3000 [21:56<04:04,  1.88it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2541/3000:  85%|████████▍ | 2540/3000 [21:56<04:04,  1.88it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2541/3000:  85%|████████▍ | 2541/3000 [21:56<04:01,  1.90it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.45e+6]Epoch 2541/3000:  85%|████████▍ | 2541/3000 [21:56<04:01,  1.90it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.45e+6]Epoch 2542/3000:  85%|████████▍ | 2541/3000 [21:56<04:01,  1.90it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.45e+6]Epoch 2542/3000:  85%|████████▍ | 2542/3000 [21:57<03:46,  2.02it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.45e+6]Epoch 2542/3000:  85%|████████▍ | 2542/3000 [21:57<03:46,  2.02it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2543/3000:  85%|████████▍ | 2542/3000 [21:57<03:46,  2.02it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2543/3000:  85%|████████▍ | 2543/3000 [21:57<03:43,  2.04it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.45e+6]Epoch 2543/3000:  85%|████████▍ | 2543/3000 [21:57<03:43,  2.04it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2544/3000:  85%|████████▍ | 2543/3000 [21:57<03:43,  2.04it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2544/3000:  85%|████████▍ | 2544/3000 [21:58<03:39,  2.08it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2544/3000:  85%|████████▍ | 2544/3000 [21:58<03:39,  2.08it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2545/3000:  85%|████████▍ | 2544/3000 [21:58<03:39,  2.08it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2545/3000:  85%|████████▍ | 2545/3000 [21:58<03:37,  2.09it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2545/3000:  85%|████████▍ | 2545/3000 [21:58<03:37,  2.09it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2546/3000:  85%|████████▍ | 2545/3000 [21:58<03:37,  2.09it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2546/3000:  85%|████████▍ | 2546/3000 [21:59<03:18,  2.29it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2546/3000:  85%|████████▍ | 2546/3000 [21:59<03:18,  2.29it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2547/3000:  85%|████████▍ | 2546/3000 [21:59<03:18,  2.29it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2547/3000:  85%|████████▍ | 2547/3000 [21:59<03:16,  2.30it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2547/3000:  85%|████████▍ | 2547/3000 [21:59<03:16,  2.30it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2548/3000:  85%|████████▍ | 2547/3000 [21:59<03:16,  2.30it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2548/3000:  85%|████████▍ | 2548/3000 [21:59<03:20,  2.25it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2548/3000:  85%|████████▍ | 2548/3000 [21:59<03:20,  2.25it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2549/3000:  85%|████████▍ | 2548/3000 [21:59<03:20,  2.25it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2549/3000:  85%|████████▍ | 2549/3000 [22:00<03:11,  2.35it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2549/3000:  85%|████████▍ | 2549/3000 [22:00<03:11,  2.35it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6] Epoch 2550/3000:  85%|████████▍ | 2549/3000 [22:00<03:11,  2.35it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6]Epoch 2550/3000:  85%|████████▌ | 2550/3000 [22:00<03:09,  2.37it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6]Epoch 2550/3000:  85%|████████▌ | 2550/3000 [22:00<03:09,  2.37it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2551/3000:  85%|████████▌ | 2550/3000 [22:00<03:09,  2.37it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2551/3000:  85%|████████▌ | 2551/3000 [22:01<03:18,  2.26it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2551/3000:  85%|████████▌ | 2551/3000 [22:01<03:18,  2.26it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2552/3000:  85%|████████▌ | 2551/3000 [22:01<03:18,  2.26it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2552/3000:  85%|████████▌ | 2552/3000 [22:01<03:28,  2.15it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2552/3000:  85%|████████▌ | 2552/3000 [22:01<03:28,  2.15it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2553/3000:  85%|████████▌ | 2552/3000 [22:01<03:28,  2.15it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2553/3000:  85%|████████▌ | 2553/3000 [22:02<03:34,  2.09it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2553/3000:  85%|████████▌ | 2553/3000 [22:02<03:34,  2.09it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2554/3000:  85%|████████▌ | 2553/3000 [22:02<03:34,  2.09it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2554/3000:  85%|████████▌ | 2554/3000 [22:02<03:34,  2.08it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2554/3000:  85%|████████▌ | 2554/3000 [22:02<03:34,  2.08it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2555/3000:  85%|████████▌ | 2554/3000 [22:02<03:34,  2.08it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2555/3000:  85%|████████▌ | 2555/3000 [22:03<03:27,  2.14it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2555/3000:  85%|████████▌ | 2555/3000 [22:03<03:27,  2.14it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2556/3000:  85%|████████▌ | 2555/3000 [22:03<03:27,  2.14it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2556/3000:  85%|████████▌ | 2556/3000 [22:03<03:29,  2.12it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2556/3000:  85%|████████▌ | 2556/3000 [22:03<03:29,  2.12it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2557/3000:  85%|████████▌ | 2556/3000 [22:03<03:29,  2.12it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2557/3000:  85%|████████▌ | 2557/3000 [22:04<03:19,  2.22it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2557/3000:  85%|████████▌ | 2557/3000 [22:04<03:19,  2.22it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2558/3000:  85%|████████▌ | 2557/3000 [22:04<03:19,  2.22it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2558/3000:  85%|████████▌ | 2558/3000 [22:04<03:03,  2.41it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2558/3000:  85%|████████▌ | 2558/3000 [22:04<03:03,  2.41it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2559/3000:  85%|████████▌ | 2558/3000 [22:04<03:03,  2.41it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2559/3000:  85%|████████▌ | 2559/3000 [22:04<03:17,  2.24it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2559/3000:  85%|████████▌ | 2559/3000 [22:04<03:17,  2.24it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2560/3000:  85%|████████▌ | 2559/3000 [22:04<03:17,  2.24it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2560/3000:  85%|████████▌ | 2560/3000 [22:05<03:03,  2.39it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2560/3000:  85%|████████▌ | 2560/3000 [22:05<03:03,  2.39it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2561/3000:  85%|████████▌ | 2560/3000 [22:05<03:03,  2.39it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2561/3000:  85%|████████▌ | 2561/3000 [22:05<02:59,  2.45it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2561/3000:  85%|████████▌ | 2561/3000 [22:05<02:59,  2.45it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2562/3000:  85%|████████▌ | 2561/3000 [22:05<02:59,  2.45it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2562/3000:  85%|████████▌ | 2562/3000 [22:06<03:10,  2.30it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2562/3000:  85%|████████▌ | 2562/3000 [22:06<03:10,  2.30it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2563/3000:  85%|████████▌ | 2562/3000 [22:06<03:10,  2.30it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2563/3000:  85%|████████▌ | 2563/3000 [22:06<03:12,  2.27it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2563/3000:  85%|████████▌ | 2563/3000 [22:06<03:12,  2.27it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2564/3000:  85%|████████▌ | 2563/3000 [22:06<03:12,  2.27it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2564/3000:  85%|████████▌ | 2564/3000 [22:07<03:11,  2.28it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2564/3000:  85%|████████▌ | 2564/3000 [22:07<03:11,  2.28it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2565/3000:  85%|████████▌ | 2564/3000 [22:07<03:11,  2.28it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2565/3000:  86%|████████▌ | 2565/3000 [22:07<03:03,  2.37it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2565/3000:  86%|████████▌ | 2565/3000 [22:07<03:03,  2.37it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2566/3000:  86%|████████▌ | 2565/3000 [22:07<03:03,  2.37it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2566/3000:  86%|████████▌ | 2566/3000 [22:07<02:58,  2.44it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2566/3000:  86%|████████▌ | 2566/3000 [22:07<02:58,  2.44it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2567/3000:  86%|████████▌ | 2566/3000 [22:07<02:58,  2.44it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2567/3000:  86%|████████▌ | 2567/3000 [22:08<03:01,  2.38it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2567/3000:  86%|████████▌ | 2567/3000 [22:08<03:01,  2.38it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2568/3000:  86%|████████▌ | 2567/3000 [22:08<03:01,  2.38it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2568/3000:  86%|████████▌ | 2568/3000 [22:08<03:06,  2.32it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2568/3000:  86%|████████▌ | 2568/3000 [22:08<03:06,  2.32it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2569/3000:  86%|████████▌ | 2568/3000 [22:08<03:06,  2.32it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2569/3000:  86%|████████▌ | 2569/3000 [22:09<02:53,  2.48it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2569/3000:  86%|████████▌ | 2569/3000 [22:09<02:53,  2.48it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2570/3000:  86%|████████▌ | 2569/3000 [22:09<02:53,  2.48it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2570/3000:  86%|████████▌ | 2570/3000 [22:09<03:01,  2.37it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2570/3000:  86%|████████▌ | 2570/3000 [22:09<03:01,  2.37it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2571/3000:  86%|████████▌ | 2570/3000 [22:09<03:01,  2.37it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2571/3000:  86%|████████▌ | 2571/3000 [22:09<02:57,  2.42it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2571/3000:  86%|████████▌ | 2571/3000 [22:09<02:57,  2.42it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2572/3000:  86%|████████▌ | 2571/3000 [22:09<02:57,  2.42it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2572/3000:  86%|████████▌ | 2572/3000 [22:10<03:04,  2.32it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2572/3000:  86%|████████▌ | 2572/3000 [22:10<03:04,  2.32it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2573/3000:  86%|████████▌ | 2572/3000 [22:10<03:04,  2.32it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2573/3000:  86%|████████▌ | 2573/3000 [22:10<03:09,  2.25it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2573/3000:  86%|████████▌ | 2573/3000 [22:10<03:09,  2.25it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2574/3000:  86%|████████▌ | 2573/3000 [22:10<03:09,  2.25it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2574/3000:  86%|████████▌ | 2574/3000 [22:11<03:00,  2.36it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2574/3000:  86%|████████▌ | 2574/3000 [22:11<03:00,  2.36it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2575/3000:  86%|████████▌ | 2574/3000 [22:11<03:00,  2.36it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2575/3000:  86%|████████▌ | 2575/3000 [22:11<03:06,  2.27it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2575/3000:  86%|████████▌ | 2575/3000 [22:11<03:06,  2.27it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2576/3000:  86%|████████▌ | 2575/3000 [22:11<03:06,  2.27it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2576/3000:  86%|████████▌ | 2576/3000 [22:12<03:05,  2.29it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2576/3000:  86%|████████▌ | 2576/3000 [22:12<03:05,  2.29it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6] Epoch 2577/3000:  86%|████████▌ | 2576/3000 [22:12<03:05,  2.29it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6]Epoch 2577/3000:  86%|████████▌ | 2577/3000 [22:12<02:57,  2.38it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6]Epoch 2577/3000:  86%|████████▌ | 2577/3000 [22:12<02:57,  2.38it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2578/3000:  86%|████████▌ | 2577/3000 [22:12<02:57,  2.38it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2578/3000:  86%|████████▌ | 2578/3000 [22:12<02:57,  2.38it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2578/3000:  86%|████████▌ | 2578/3000 [22:12<02:57,  2.38it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2579/3000:  86%|████████▌ | 2578/3000 [22:12<02:57,  2.38it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2579/3000:  86%|████████▌ | 2579/3000 [22:13<03:02,  2.31it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2579/3000:  86%|████████▌ | 2579/3000 [22:13<03:02,  2.31it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2580/3000:  86%|████████▌ | 2579/3000 [22:13<03:02,  2.31it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2580/3000:  86%|████████▌ | 2580/3000 [22:13<02:59,  2.34it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2580/3000:  86%|████████▌ | 2580/3000 [22:13<02:59,  2.34it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2581/3000:  86%|████████▌ | 2580/3000 [22:13<02:59,  2.34it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2581/3000:  86%|████████▌ | 2581/3000 [22:14<03:01,  2.31it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2581/3000:  86%|████████▌ | 2581/3000 [22:14<03:01,  2.31it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6] Epoch 2582/3000:  86%|████████▌ | 2581/3000 [22:14<03:01,  2.31it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6]Epoch 2582/3000:  86%|████████▌ | 2582/3000 [22:14<03:08,  2.21it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6]Epoch 2582/3000:  86%|████████▌ | 2582/3000 [22:14<03:08,  2.21it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2583/3000:  86%|████████▌ | 2582/3000 [22:14<03:08,  2.21it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2583/3000:  86%|████████▌ | 2583/3000 [22:15<03:15,  2.13it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2583/3000:  86%|████████▌ | 2583/3000 [22:15<03:15,  2.13it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2584/3000:  86%|████████▌ | 2583/3000 [22:15<03:15,  2.13it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2584/3000:  86%|████████▌ | 2584/3000 [22:15<03:14,  2.13it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2584/3000:  86%|████████▌ | 2584/3000 [22:15<03:14,  2.13it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2585/3000:  86%|████████▌ | 2584/3000 [22:15<03:14,  2.13it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2585/3000:  86%|████████▌ | 2585/3000 [22:16<03:08,  2.20it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2585/3000:  86%|████████▌ | 2585/3000 [22:16<03:08,  2.20it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2586/3000:  86%|████████▌ | 2585/3000 [22:16<03:08,  2.20it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2586/3000:  86%|████████▌ | 2586/3000 [22:16<03:03,  2.25it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2586/3000:  86%|████████▌ | 2586/3000 [22:16<03:03,  2.25it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2587/3000:  86%|████████▌ | 2586/3000 [22:16<03:03,  2.25it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2587/3000:  86%|████████▌ | 2587/3000 [22:17<03:06,  2.22it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2587/3000:  86%|████████▌ | 2587/3000 [22:17<03:06,  2.22it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2588/3000:  86%|████████▌ | 2587/3000 [22:17<03:06,  2.22it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2588/3000:  86%|████████▋ | 2588/3000 [22:17<03:06,  2.21it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2588/3000:  86%|████████▋ | 2588/3000 [22:17<03:06,  2.21it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2589/3000:  86%|████████▋ | 2588/3000 [22:17<03:06,  2.21it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2589/3000:  86%|████████▋ | 2589/3000 [22:17<02:57,  2.31it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2589/3000:  86%|████████▋ | 2589/3000 [22:17<02:57,  2.31it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2590/3000:  86%|████████▋ | 2589/3000 [22:17<02:57,  2.31it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2590/3000:  86%|████████▋ | 2590/3000 [22:18<02:55,  2.33it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2590/3000:  86%|████████▋ | 2590/3000 [22:18<02:55,  2.33it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2591/3000:  86%|████████▋ | 2590/3000 [22:18<02:55,  2.33it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2591/3000:  86%|████████▋ | 2591/3000 [22:18<02:59,  2.27it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2591/3000:  86%|████████▋ | 2591/3000 [22:18<02:59,  2.27it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2592/3000:  86%|████████▋ | 2591/3000 [22:18<02:59,  2.27it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2592/3000:  86%|████████▋ | 2592/3000 [22:19<02:57,  2.30it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2592/3000:  86%|████████▋ | 2592/3000 [22:19<02:57,  2.30it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.44e+6] Epoch 2593/3000:  86%|████████▋ | 2592/3000 [22:19<02:57,  2.30it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.44e+6]Epoch 2593/3000:  86%|████████▋ | 2593/3000 [22:19<02:58,  2.28it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.44e+6]Epoch 2593/3000:  86%|████████▋ | 2593/3000 [22:19<02:58,  2.28it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2594/3000:  86%|████████▋ | 2593/3000 [22:19<02:58,  2.28it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2594/3000:  86%|████████▋ | 2594/3000 [22:20<02:52,  2.35it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2594/3000:  86%|████████▋ | 2594/3000 [22:20<02:52,  2.35it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2595/3000:  86%|████████▋ | 2594/3000 [22:20<02:52,  2.35it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2595/3000:  86%|████████▋ | 2595/3000 [22:20<02:56,  2.30it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2595/3000:  86%|████████▋ | 2595/3000 [22:20<02:56,  2.30it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2596/3000:  86%|████████▋ | 2595/3000 [22:20<02:56,  2.30it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2596/3000:  87%|████████▋ | 2596/3000 [22:20<02:56,  2.29it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2596/3000:  87%|████████▋ | 2596/3000 [22:20<02:56,  2.29it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2597/3000:  87%|████████▋ | 2596/3000 [22:20<02:56,  2.29it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2597/3000:  87%|████████▋ | 2597/3000 [39:41<34:58:55, 312.50s/it, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2597/3000:  87%|████████▋ | 2597/3000 [39:41<34:58:55, 312.50s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2598/3000:  87%|████████▋ | 2597/3000 [39:41<34:58:55, 312.50s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2598/3000:  87%|████████▋ | 2598/3000 [39:42<24:26:31, 218.88s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2598/3000:  87%|████████▋ | 2598/3000 [39:42<24:26:31, 218.88s/it, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2599/3000:  87%|████████▋ | 2598/3000 [39:42<24:26:31, 218.88s/it, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2599/3000:  87%|████████▋ | 2599/3000 [39:42<17:04:58, 153.36s/it, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2599/3000:  87%|████████▋ | 2599/3000 [39:42<17:04:58, 153.36s/it, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2600/3000:  87%|████████▋ | 2599/3000 [39:42<17:04:58, 153.36s/it, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2600/3000:  87%|████████▋ | 2600/3000 [39:43<11:56:39, 107.50s/it, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2600/3000:  87%|████████▋ | 2600/3000 [39:43<11:56:39, 107.50s/it, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.44e+6] Epoch 2601/3000:  87%|████████▋ | 2600/3000 [39:43<11:56:39, 107.50s/it, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.44e+6]Epoch 2601/3000:  87%|████████▋ | 2601/3000 [39:43<8:21:22, 75.39s/it, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.44e+6]  Epoch 2601/3000:  87%|████████▋ | 2601/3000 [39:43<8:21:22, 75.39s/it, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2602/3000:  87%|████████▋ | 2601/3000 [39:43<8:21:22, 75.39s/it, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2602/3000:  87%|████████▋ | 2602/3000 [39:43<5:50:58, 52.91s/it, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2602/3000:  87%|████████▋ | 2602/3000 [39:43<5:50:58, 52.91s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2603/3000:  87%|████████▋ | 2602/3000 [39:43<5:50:58, 52.91s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2603/3000:  87%|████████▋ | 2603/3000 [39:44<4:05:52, 37.16s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2603/3000:  87%|████████▋ | 2603/3000 [39:44<4:05:52, 37.16s/it, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2604/3000:  87%|████████▋ | 2603/3000 [39:44<4:05:52, 37.16s/it, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2604/3000:  87%|████████▋ | 2604/3000 [39:44<2:52:20, 26.11s/it, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2604/3000:  87%|████████▋ | 2604/3000 [39:44<2:52:20, 26.11s/it, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2605/3000:  87%|████████▋ | 2604/3000 [39:44<2:52:20, 26.11s/it, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2605/3000:  87%|████████▋ | 2605/3000 [39:45<2:01:08, 18.40s/it, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2605/3000:  87%|████████▋ | 2605/3000 [39:45<2:01:08, 18.40s/it, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2606/3000:  87%|████████▋ | 2605/3000 [39:45<2:01:08, 18.40s/it, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2606/3000:  87%|████████▋ | 2606/3000 [39:45<1:25:28, 13.02s/it, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2606/3000:  87%|████████▋ | 2606/3000 [39:45<1:25:28, 13.02s/it, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2607/3000:  87%|████████▋ | 2606/3000 [39:45<1:25:28, 13.02s/it, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2607/3000:  87%|████████▋ | 2607/3000 [39:45<1:00:32,  9.24s/it, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2607/3000:  87%|████████▋ | 2607/3000 [39:46<1:00:32,  9.24s/it, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2608/3000:  87%|████████▋ | 2607/3000 [39:46<1:00:32,  9.24s/it, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2608/3000:  87%|████████▋ | 2608/3000 [39:46<43:07,  6.60s/it, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]  Epoch 2608/3000:  87%|████████▋ | 2608/3000 [39:46<43:07,  6.60s/it, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2609/3000:  87%|████████▋ | 2608/3000 [39:46<43:07,  6.60s/it, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2609/3000:  87%|████████▋ | 2609/3000 [39:46<30:58,  4.75s/it, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2609/3000:  87%|████████▋ | 2609/3000 [39:46<30:58,  4.75s/it, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2610/3000:  87%|████████▋ | 2609/3000 [39:46<30:58,  4.75s/it, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2610/3000:  87%|████████▋ | 2610/3000 [39:47<22:34,  3.47s/it, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2610/3000:  87%|████████▋ | 2610/3000 [39:47<22:34,  3.47s/it, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2611/3000:  87%|████████▋ | 2610/3000 [39:47<22:34,  3.47s/it, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2611/3000:  87%|████████▋ | 2611/3000 [39:47<16:30,  2.55s/it, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2611/3000:  87%|████████▋ | 2611/3000 [39:47<16:30,  2.55s/it, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2612/3000:  87%|████████▋ | 2611/3000 [39:47<16:30,  2.55s/it, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2612/3000:  87%|████████▋ | 2612/3000 [39:48<12:33,  1.94s/it, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2612/3000:  87%|████████▋ | 2612/3000 [39:48<12:33,  1.94s/it, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2613/3000:  87%|████████▋ | 2612/3000 [39:48<12:33,  1.94s/it, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2613/3000:  87%|████████▋ | 2613/3000 [39:48<09:35,  1.49s/it, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2613/3000:  87%|████████▋ | 2613/3000 [39:48<09:35,  1.49s/it, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2614/3000:  87%|████████▋ | 2613/3000 [39:48<09:35,  1.49s/it, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2614/3000:  87%|████████▋ | 2614/3000 [39:49<07:32,  1.17s/it, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2614/3000:  87%|████████▋ | 2614/3000 [39:49<07:32,  1.17s/it, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2615/3000:  87%|████████▋ | 2614/3000 [39:49<07:32,  1.17s/it, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2615/3000:  87%|████████▋ | 2615/3000 [39:49<06:05,  1.05it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2615/3000:  87%|████████▋ | 2615/3000 [39:49<06:05,  1.05it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.44e+6] Epoch 2616/3000:  87%|████████▋ | 2615/3000 [39:49<06:05,  1.05it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.44e+6]Epoch 2616/3000:  87%|████████▋ | 2616/3000 [39:50<05:09,  1.24it/s, v_num=1, train_loss_step=1.5e+6, train_loss_epoch=1.44e+6]Epoch 2616/3000:  87%|████████▋ | 2616/3000 [39:50<05:09,  1.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2617/3000:  87%|████████▋ | 2616/3000 [39:50<05:09,  1.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2617/3000:  87%|████████▋ | 2617/3000 [39:50<04:24,  1.45it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2617/3000:  87%|████████▋ | 2617/3000 [39:50<04:24,  1.45it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2618/3000:  87%|████████▋ | 2617/3000 [39:50<04:24,  1.45it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2618/3000:  87%|████████▋ | 2618/3000 [39:50<03:50,  1.66it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2618/3000:  87%|████████▋ | 2618/3000 [39:50<03:50,  1.66it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2619/3000:  87%|████████▋ | 2618/3000 [39:50<03:50,  1.66it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2619/3000:  87%|████████▋ | 2619/3000 [39:51<03:35,  1.77it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2619/3000:  87%|████████▋ | 2619/3000 [39:51<03:35,  1.77it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2620/3000:  87%|████████▋ | 2619/3000 [39:51<03:35,  1.77it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2620/3000:  87%|████████▋ | 2620/3000 [39:51<03:21,  1.89it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2620/3000:  87%|████████▋ | 2620/3000 [39:51<03:21,  1.89it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2621/3000:  87%|████████▋ | 2620/3000 [39:51<03:21,  1.89it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2621/3000:  87%|████████▋ | 2621/3000 [39:52<03:11,  1.98it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2621/3000:  87%|████████▋ | 2621/3000 [39:52<03:11,  1.98it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2622/3000:  87%|████████▋ | 2621/3000 [39:52<03:11,  1.98it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2622/3000:  87%|████████▋ | 2622/3000 [39:52<02:56,  2.15it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2622/3000:  87%|████████▋ | 2622/3000 [39:52<02:56,  2.15it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2623/3000:  87%|████████▋ | 2622/3000 [39:52<02:56,  2.15it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2623/3000:  87%|████████▋ | 2623/3000 [39:52<02:46,  2.27it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2623/3000:  87%|████████▋ | 2623/3000 [39:52<02:46,  2.27it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2624/3000:  87%|████████▋ | 2623/3000 [39:52<02:46,  2.27it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2624/3000:  87%|████████▋ | 2624/3000 [39:53<02:51,  2.20it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2624/3000:  87%|████████▋ | 2624/3000 [39:53<02:51,  2.20it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2625/3000:  87%|████████▋ | 2624/3000 [39:53<02:51,  2.20it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2625/3000:  88%|████████▊ | 2625/3000 [39:53<02:49,  2.22it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2625/3000:  88%|████████▊ | 2625/3000 [39:53<02:49,  2.22it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2626/3000:  88%|████████▊ | 2625/3000 [39:53<02:49,  2.22it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2626/3000:  88%|████████▊ | 2626/3000 [39:54<02:45,  2.26it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2626/3000:  88%|████████▊ | 2626/3000 [39:54<02:45,  2.26it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2627/3000:  88%|████████▊ | 2626/3000 [39:54<02:45,  2.26it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2627/3000:  88%|████████▊ | 2627/3000 [39:54<02:41,  2.32it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2627/3000:  88%|████████▊ | 2627/3000 [39:54<02:41,  2.32it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2628/3000:  88%|████████▊ | 2627/3000 [39:54<02:41,  2.32it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2628/3000:  88%|████████▊ | 2628/3000 [39:55<02:39,  2.33it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2628/3000:  88%|████████▊ | 2628/3000 [39:55<02:39,  2.33it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2629/3000:  88%|████████▊ | 2628/3000 [39:55<02:39,  2.33it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2629/3000:  88%|████████▊ | 2629/3000 [39:55<02:44,  2.26it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2629/3000:  88%|████████▊ | 2629/3000 [39:55<02:44,  2.26it/s, v_num=1, train_loss_step=1.38e+6, train_loss_epoch=1.44e+6]Epoch 2630/3000:  88%|████████▊ | 2629/3000 [39:55<02:44,  2.26it/s, v_num=1, train_loss_step=1.38e+6, train_loss_epoch=1.44e+6]Epoch 2630/3000:  88%|████████▊ | 2630/3000 [39:56<02:44,  2.24it/s, v_num=1, train_loss_step=1.38e+6, train_loss_epoch=1.44e+6]Epoch 2630/3000:  88%|████████▊ | 2630/3000 [39:56<02:44,  2.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2631/3000:  88%|████████▊ | 2630/3000 [39:56<02:44,  2.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2631/3000:  88%|████████▊ | 2631/3000 [39:56<02:33,  2.41it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2631/3000:  88%|████████▊ | 2631/3000 [39:56<02:33,  2.41it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2632/3000:  88%|████████▊ | 2631/3000 [39:56<02:33,  2.41it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2632/3000:  88%|████████▊ | 2632/3000 [39:56<02:27,  2.49it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2632/3000:  88%|████████▊ | 2632/3000 [39:56<02:27,  2.49it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2633/3000:  88%|████████▊ | 2632/3000 [39:56<02:27,  2.49it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2633/3000:  88%|████████▊ | 2633/3000 [39:57<02:33,  2.40it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2633/3000:  88%|████████▊ | 2633/3000 [39:57<02:33,  2.40it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2634/3000:  88%|████████▊ | 2633/3000 [39:57<02:33,  2.40it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2634/3000:  88%|████████▊ | 2634/3000 [39:57<02:36,  2.34it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2634/3000:  88%|████████▊ | 2634/3000 [39:57<02:36,  2.34it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2635/3000:  88%|████████▊ | 2634/3000 [39:57<02:36,  2.34it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2635/3000:  88%|████████▊ | 2635/3000 [39:58<02:34,  2.37it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2635/3000:  88%|████████▊ | 2635/3000 [39:58<02:34,  2.37it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2636/3000:  88%|████████▊ | 2635/3000 [39:58<02:34,  2.37it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2636/3000:  88%|████████▊ | 2636/3000 [39:58<02:33,  2.37it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2636/3000:  88%|████████▊ | 2636/3000 [39:58<02:33,  2.37it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.44e+6]Epoch 2637/3000:  88%|████████▊ | 2636/3000 [39:58<02:33,  2.37it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.44e+6]Epoch 2637/3000:  88%|████████▊ | 2637/3000 [39:58<02:35,  2.33it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.44e+6]Epoch 2637/3000:  88%|████████▊ | 2637/3000 [39:58<02:35,  2.33it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2638/3000:  88%|████████▊ | 2637/3000 [39:59<02:35,  2.33it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2638/3000:  88%|████████▊ | 2638/3000 [39:59<02:41,  2.24it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2638/3000:  88%|████████▊ | 2638/3000 [39:59<02:41,  2.24it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2639/3000:  88%|████████▊ | 2638/3000 [39:59<02:41,  2.24it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2639/3000:  88%|████████▊ | 2639/3000 [39:59<02:40,  2.25it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2639/3000:  88%|████████▊ | 2639/3000 [39:59<02:40,  2.25it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2640/3000:  88%|████████▊ | 2639/3000 [39:59<02:40,  2.25it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2640/3000:  88%|████████▊ | 2640/3000 [40:00<02:43,  2.20it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2640/3000:  88%|████████▊ | 2640/3000 [40:00<02:43,  2.20it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2641/3000:  88%|████████▊ | 2640/3000 [40:00<02:43,  2.20it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2641/3000:  88%|████████▊ | 2641/3000 [40:00<02:44,  2.19it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2641/3000:  88%|████████▊ | 2641/3000 [40:00<02:44,  2.19it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2642/3000:  88%|████████▊ | 2641/3000 [40:00<02:44,  2.19it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2642/3000:  88%|████████▊ | 2642/3000 [40:01<02:45,  2.17it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2642/3000:  88%|████████▊ | 2642/3000 [40:01<02:45,  2.17it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2643/3000:  88%|████████▊ | 2642/3000 [40:01<02:45,  2.17it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2643/3000:  88%|████████▊ | 2643/3000 [40:01<02:46,  2.15it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2643/3000:  88%|████████▊ | 2643/3000 [40:01<02:46,  2.15it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2644/3000:  88%|████████▊ | 2643/3000 [40:01<02:46,  2.15it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2644/3000:  88%|████████▊ | 2644/3000 [40:02<02:50,  2.09it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2644/3000:  88%|████████▊ | 2644/3000 [40:02<02:50,  2.09it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.44e+6]Epoch 2645/3000:  88%|████████▊ | 2644/3000 [40:02<02:50,  2.09it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.44e+6]Epoch 2645/3000:  88%|████████▊ | 2645/3000 [40:02<02:45,  2.14it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.44e+6]Epoch 2645/3000:  88%|████████▊ | 2645/3000 [40:02<02:45,  2.14it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2646/3000:  88%|████████▊ | 2645/3000 [40:02<02:45,  2.14it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2646/3000:  88%|████████▊ | 2646/3000 [40:03<02:46,  2.13it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2646/3000:  88%|████████▊ | 2646/3000 [40:03<02:46,  2.13it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2647/3000:  88%|████████▊ | 2646/3000 [40:03<02:46,  2.13it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2647/3000:  88%|████████▊ | 2647/3000 [40:03<02:43,  2.16it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2647/3000:  88%|████████▊ | 2647/3000 [40:03<02:43,  2.16it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2648/3000:  88%|████████▊ | 2647/3000 [40:03<02:43,  2.16it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2648/3000:  88%|████████▊ | 2648/3000 [40:04<02:43,  2.16it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2648/3000:  88%|████████▊ | 2648/3000 [40:04<02:43,  2.16it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2649/3000:  88%|████████▊ | 2648/3000 [40:04<02:43,  2.16it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2649/3000:  88%|████████▊ | 2649/3000 [40:04<02:33,  2.28it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2649/3000:  88%|████████▊ | 2649/3000 [40:04<02:33,  2.28it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2650/3000:  88%|████████▊ | 2649/3000 [40:04<02:33,  2.28it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2650/3000:  88%|████████▊ | 2650/3000 [40:04<02:33,  2.28it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2650/3000:  88%|████████▊ | 2650/3000 [40:04<02:33,  2.28it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2651/3000:  88%|████████▊ | 2650/3000 [40:04<02:33,  2.28it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2651/3000:  88%|████████▊ | 2651/3000 [40:05<02:32,  2.28it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2651/3000:  88%|████████▊ | 2651/3000 [40:05<02:32,  2.28it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2652/3000:  88%|████████▊ | 2651/3000 [40:05<02:32,  2.28it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2652/3000:  88%|████████▊ | 2652/3000 [40:05<02:32,  2.28it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2652/3000:  88%|████████▊ | 2652/3000 [40:05<02:32,  2.28it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2653/3000:  88%|████████▊ | 2652/3000 [40:05<02:32,  2.28it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2653/3000:  88%|████████▊ | 2653/3000 [40:06<02:28,  2.34it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2653/3000:  88%|████████▊ | 2653/3000 [40:06<02:28,  2.34it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2654/3000:  88%|████████▊ | 2653/3000 [40:06<02:28,  2.34it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2654/3000:  88%|████████▊ | 2654/3000 [40:06<02:28,  2.32it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2654/3000:  88%|████████▊ | 2654/3000 [40:06<02:28,  2.32it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2655/3000:  88%|████████▊ | 2654/3000 [40:06<02:28,  2.32it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2655/3000:  88%|████████▊ | 2655/3000 [40:07<02:31,  2.27it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2655/3000:  88%|████████▊ | 2655/3000 [40:07<02:31,  2.27it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2656/3000:  88%|████████▊ | 2655/3000 [40:07<02:31,  2.27it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2656/3000:  89%|████████▊ | 2656/3000 [40:07<02:27,  2.33it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2656/3000:  89%|████████▊ | 2656/3000 [40:07<02:27,  2.33it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2657/3000:  89%|████████▊ | 2656/3000 [40:07<02:27,  2.33it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2657/3000:  89%|████████▊ | 2657/3000 [40:07<02:25,  2.35it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2657/3000:  89%|████████▊ | 2657/3000 [40:07<02:25,  2.35it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2658/3000:  89%|████████▊ | 2657/3000 [40:07<02:25,  2.35it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2658/3000:  89%|████████▊ | 2658/3000 [40:08<02:24,  2.37it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2658/3000:  89%|████████▊ | 2658/3000 [40:08<02:24,  2.37it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2659/3000:  89%|████████▊ | 2658/3000 [40:08<02:24,  2.37it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2659/3000:  89%|████████▊ | 2659/3000 [40:08<02:23,  2.38it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2659/3000:  89%|████████▊ | 2659/3000 [40:08<02:23,  2.38it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2660/3000:  89%|████████▊ | 2659/3000 [40:08<02:23,  2.38it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2660/3000:  89%|████████▊ | 2660/3000 [40:09<02:30,  2.27it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2660/3000:  89%|████████▊ | 2660/3000 [40:09<02:30,  2.27it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.44e+6]Epoch 2661/3000:  89%|████████▊ | 2660/3000 [40:09<02:30,  2.27it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.44e+6]Epoch 2661/3000:  89%|████████▊ | 2661/3000 [40:09<02:28,  2.29it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.44e+6]Epoch 2661/3000:  89%|████████▊ | 2661/3000 [40:09<02:28,  2.29it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2662/3000:  89%|████████▊ | 2661/3000 [40:09<02:28,  2.29it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2662/3000:  89%|████████▊ | 2662/3000 [40:09<02:13,  2.54it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2662/3000:  89%|████████▊ | 2662/3000 [40:10<02:13,  2.54it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2663/3000:  89%|████████▊ | 2662/3000 [40:10<02:13,  2.54it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2663/3000:  89%|████████▉ | 2663/3000 [40:10<02:15,  2.49it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2663/3000:  89%|████████▉ | 2663/3000 [40:10<02:15,  2.49it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2664/3000:  89%|████████▉ | 2663/3000 [40:10<02:15,  2.49it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2664/3000:  89%|████████▉ | 2664/3000 [40:10<02:13,  2.52it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2664/3000:  89%|████████▉ | 2664/3000 [40:10<02:13,  2.52it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2665/3000:  89%|████████▉ | 2664/3000 [40:10<02:13,  2.52it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2665/3000:  89%|████████▉ | 2665/3000 [40:11<02:23,  2.33it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2665/3000:  89%|████████▉ | 2665/3000 [40:11<02:23,  2.33it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2666/3000:  89%|████████▉ | 2665/3000 [40:11<02:23,  2.33it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2666/3000:  89%|████████▉ | 2666/3000 [40:11<02:23,  2.33it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2666/3000:  89%|████████▉ | 2666/3000 [40:11<02:23,  2.33it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.44e+6]Epoch 2667/3000:  89%|████████▉ | 2666/3000 [40:11<02:23,  2.33it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.44e+6]Epoch 2667/3000:  89%|████████▉ | 2667/3000 [40:12<02:27,  2.26it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.44e+6]Epoch 2667/3000:  89%|████████▉ | 2667/3000 [40:12<02:27,  2.26it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2668/3000:  89%|████████▉ | 2667/3000 [40:12<02:27,  2.26it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2668/3000:  89%|████████▉ | 2668/3000 [40:12<02:23,  2.32it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2668/3000:  89%|████████▉ | 2668/3000 [40:12<02:23,  2.32it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2669/3000:  89%|████████▉ | 2668/3000 [40:12<02:23,  2.32it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2669/3000:  89%|████████▉ | 2669/3000 [40:12<02:14,  2.47it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2669/3000:  89%|████████▉ | 2669/3000 [40:12<02:14,  2.47it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6] Epoch 2670/3000:  89%|████████▉ | 2669/3000 [40:12<02:14,  2.47it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6]Epoch 2670/3000:  89%|████████▉ | 2670/3000 [40:13<02:14,  2.46it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6]Epoch 2670/3000:  89%|████████▉ | 2670/3000 [40:13<02:14,  2.46it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2671/3000:  89%|████████▉ | 2670/3000 [40:13<02:14,  2.46it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2671/3000:  89%|████████▉ | 2671/3000 [40:13<02:17,  2.39it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2671/3000:  89%|████████▉ | 2671/3000 [40:13<02:17,  2.39it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2672/3000:  89%|████████▉ | 2671/3000 [40:13<02:17,  2.39it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2672/3000:  89%|████████▉ | 2672/3000 [40:14<02:17,  2.38it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2672/3000:  89%|████████▉ | 2672/3000 [40:14<02:17,  2.38it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2673/3000:  89%|████████▉ | 2672/3000 [40:14<02:17,  2.38it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2673/3000:  89%|████████▉ | 2673/3000 [40:14<02:24,  2.26it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2673/3000:  89%|████████▉ | 2673/3000 [40:14<02:24,  2.26it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2674/3000:  89%|████████▉ | 2673/3000 [40:14<02:24,  2.26it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2674/3000:  89%|████████▉ | 2674/3000 [40:15<02:33,  2.13it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2674/3000:  89%|████████▉ | 2674/3000 [40:15<02:33,  2.13it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2675/3000:  89%|████████▉ | 2674/3000 [40:15<02:33,  2.13it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2675/3000:  89%|████████▉ | 2675/3000 [40:15<02:37,  2.06it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2675/3000:  89%|████████▉ | 2675/3000 [40:15<02:37,  2.06it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2676/3000:  89%|████████▉ | 2675/3000 [40:15<02:37,  2.06it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2676/3000:  89%|████████▉ | 2676/3000 [40:16<02:33,  2.11it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2676/3000:  89%|████████▉ | 2676/3000 [40:16<02:33,  2.11it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2677/3000:  89%|████████▉ | 2676/3000 [40:16<02:33,  2.11it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2677/3000:  89%|████████▉ | 2677/3000 [40:16<02:33,  2.11it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2677/3000:  89%|████████▉ | 2677/3000 [40:16<02:33,  2.11it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2678/3000:  89%|████████▉ | 2677/3000 [40:16<02:33,  2.11it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2678/3000:  89%|████████▉ | 2678/3000 [40:17<02:35,  2.07it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2678/3000:  89%|████████▉ | 2678/3000 [40:17<02:35,  2.07it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2679/3000:  89%|████████▉ | 2678/3000 [40:17<02:35,  2.07it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2679/3000:  89%|████████▉ | 2679/3000 [40:17<02:36,  2.05it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2679/3000:  89%|████████▉ | 2679/3000 [40:17<02:36,  2.05it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2680/3000:  89%|████████▉ | 2679/3000 [40:17<02:36,  2.05it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2680/3000:  89%|████████▉ | 2680/3000 [40:18<02:32,  2.10it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2680/3000:  89%|████████▉ | 2680/3000 [40:18<02:32,  2.10it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2681/3000:  89%|████████▉ | 2680/3000 [40:18<02:32,  2.10it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2681/3000:  89%|████████▉ | 2681/3000 [40:18<02:30,  2.12it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2681/3000:  89%|████████▉ | 2681/3000 [40:18<02:30,  2.12it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2682/3000:  89%|████████▉ | 2681/3000 [40:18<02:30,  2.12it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2682/3000:  89%|████████▉ | 2682/3000 [40:19<02:23,  2.21it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2682/3000:  89%|████████▉ | 2682/3000 [40:19<02:23,  2.21it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2683/3000:  89%|████████▉ | 2682/3000 [40:19<02:23,  2.21it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2683/3000:  89%|████████▉ | 2683/3000 [40:19<02:25,  2.18it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2683/3000:  89%|████████▉ | 2683/3000 [40:19<02:25,  2.18it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2684/3000:  89%|████████▉ | 2683/3000 [40:19<02:25,  2.18it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2684/3000:  89%|████████▉ | 2684/3000 [40:19<02:22,  2.22it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2684/3000:  89%|████████▉ | 2684/3000 [40:19<02:22,  2.22it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2685/3000:  89%|████████▉ | 2684/3000 [40:19<02:22,  2.22it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2685/3000:  90%|████████▉ | 2685/3000 [40:20<02:17,  2.29it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2685/3000:  90%|████████▉ | 2685/3000 [40:20<02:17,  2.29it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6] Epoch 2686/3000:  90%|████████▉ | 2685/3000 [40:20<02:17,  2.29it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6]Epoch 2686/3000:  90%|████████▉ | 2686/3000 [40:20<02:20,  2.23it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6]Epoch 2686/3000:  90%|████████▉ | 2686/3000 [40:20<02:20,  2.23it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2687/3000:  90%|████████▉ | 2686/3000 [40:20<02:20,  2.23it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2687/3000:  90%|████████▉ | 2687/3000 [40:21<02:22,  2.19it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2687/3000:  90%|████████▉ | 2687/3000 [40:21<02:22,  2.19it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2688/3000:  90%|████████▉ | 2687/3000 [40:21<02:22,  2.19it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2688/3000:  90%|████████▉ | 2688/3000 [40:21<02:19,  2.23it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2688/3000:  90%|████████▉ | 2688/3000 [40:21<02:19,  2.23it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2689/3000:  90%|████████▉ | 2688/3000 [40:21<02:19,  2.23it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2689/3000:  90%|████████▉ | 2689/3000 [40:22<02:24,  2.15it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2689/3000:  90%|████████▉ | 2689/3000 [40:22<02:24,  2.15it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2690/3000:  90%|████████▉ | 2689/3000 [40:22<02:24,  2.15it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2690/3000:  90%|████████▉ | 2690/3000 [40:22<02:21,  2.19it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2690/3000:  90%|████████▉ | 2690/3000 [40:22<02:21,  2.19it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2691/3000:  90%|████████▉ | 2690/3000 [40:22<02:21,  2.19it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2691/3000:  90%|████████▉ | 2691/3000 [40:23<02:24,  2.13it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2691/3000:  90%|████████▉ | 2691/3000 [40:23<02:24,  2.13it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2692/3000:  90%|████████▉ | 2691/3000 [40:23<02:24,  2.13it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2692/3000:  90%|████████▉ | 2692/3000 [40:23<02:20,  2.20it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2692/3000:  90%|████████▉ | 2692/3000 [40:23<02:20,  2.20it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2693/3000:  90%|████████▉ | 2692/3000 [40:23<02:20,  2.20it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2693/3000:  90%|████████▉ | 2693/3000 [40:24<02:18,  2.22it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2693/3000:  90%|████████▉ | 2693/3000 [40:24<02:18,  2.22it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6] Epoch 2694/3000:  90%|████████▉ | 2693/3000 [40:24<02:18,  2.22it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6]Epoch 2694/3000:  90%|████████▉ | 2694/3000 [40:24<02:20,  2.18it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6]Epoch 2694/3000:  90%|████████▉ | 2694/3000 [40:24<02:20,  2.18it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2695/3000:  90%|████████▉ | 2694/3000 [40:24<02:20,  2.18it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2695/3000:  90%|████████▉ | 2695/3000 [40:24<02:13,  2.29it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.44e+6]Epoch 2695/3000:  90%|████████▉ | 2695/3000 [40:24<02:13,  2.29it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2696/3000:  90%|████████▉ | 2695/3000 [40:24<02:13,  2.29it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2696/3000:  90%|████████▉ | 2696/3000 [40:25<02:12,  2.29it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2696/3000:  90%|████████▉ | 2696/3000 [40:25<02:12,  2.29it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6] Epoch 2697/3000:  90%|████████▉ | 2696/3000 [40:25<02:12,  2.29it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6]Epoch 2697/3000:  90%|████████▉ | 2697/3000 [40:25<02:16,  2.22it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6]Epoch 2697/3000:  90%|████████▉ | 2697/3000 [40:25<02:16,  2.22it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2698/3000:  90%|████████▉ | 2697/3000 [40:25<02:16,  2.22it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2698/3000:  90%|████████▉ | 2698/3000 [40:26<02:21,  2.13it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2698/3000:  90%|████████▉ | 2698/3000 [40:26<02:21,  2.13it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2699/3000:  90%|████████▉ | 2698/3000 [40:26<02:21,  2.13it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2699/3000:  90%|████████▉ | 2699/3000 [40:26<02:20,  2.14it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2699/3000:  90%|████████▉ | 2699/3000 [40:26<02:20,  2.14it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2700/3000:  90%|████████▉ | 2699/3000 [40:26<02:20,  2.14it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2700/3000:  90%|█████████ | 2700/3000 [40:27<02:24,  2.07it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2700/3000:  90%|█████████ | 2700/3000 [40:27<02:24,  2.07it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2701/3000:  90%|█████████ | 2700/3000 [40:27<02:24,  2.07it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2701/3000:  90%|█████████ | 2701/3000 [40:27<02:19,  2.14it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2701/3000:  90%|█████████ | 2701/3000 [40:27<02:19,  2.14it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2702/3000:  90%|█████████ | 2701/3000 [40:27<02:19,  2.14it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2702/3000:  90%|█████████ | 2702/3000 [40:28<02:18,  2.15it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2702/3000:  90%|█████████ | 2702/3000 [40:28<02:18,  2.15it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6] Epoch 2703/3000:  90%|█████████ | 2702/3000 [40:28<02:18,  2.15it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6]Epoch 2703/3000:  90%|█████████ | 2703/3000 [40:28<02:20,  2.11it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6]Epoch 2703/3000:  90%|█████████ | 2703/3000 [40:28<02:20,  2.11it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2704/3000:  90%|█████████ | 2703/3000 [40:28<02:20,  2.11it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2704/3000:  90%|█████████ | 2704/3000 [40:29<02:13,  2.21it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2704/3000:  90%|█████████ | 2704/3000 [40:29<02:13,  2.21it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2705/3000:  90%|█████████ | 2704/3000 [40:29<02:13,  2.21it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2705/3000:  90%|█████████ | 2705/3000 [40:29<02:14,  2.20it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2705/3000:  90%|█████████ | 2705/3000 [40:29<02:14,  2.20it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2706/3000:  90%|█████████ | 2705/3000 [40:29<02:14,  2.20it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2706/3000:  90%|█████████ | 2706/3000 [40:29<02:09,  2.26it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2706/3000:  90%|█████████ | 2706/3000 [40:29<02:09,  2.26it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2707/3000:  90%|█████████ | 2706/3000 [40:29<02:09,  2.26it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2707/3000:  90%|█████████ | 2707/3000 [40:30<02:14,  2.18it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2707/3000:  90%|█████████ | 2707/3000 [40:30<02:14,  2.18it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2708/3000:  90%|█████████ | 2707/3000 [40:30<02:14,  2.18it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2708/3000:  90%|█████████ | 2708/3000 [40:30<02:11,  2.22it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2708/3000:  90%|█████████ | 2708/3000 [40:30<02:11,  2.22it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2709/3000:  90%|█████████ | 2708/3000 [40:30<02:11,  2.22it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2709/3000:  90%|█████████ | 2709/3000 [40:31<02:15,  2.15it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2709/3000:  90%|█████████ | 2709/3000 [40:31<02:15,  2.15it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2710/3000:  90%|█████████ | 2709/3000 [40:31<02:15,  2.15it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2710/3000:  90%|█████████ | 2710/3000 [40:31<02:06,  2.29it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2710/3000:  90%|█████████ | 2710/3000 [40:31<02:06,  2.29it/s, v_num=1, train_loss_step=1.38e+6, train_loss_epoch=1.44e+6]Epoch 2711/3000:  90%|█████████ | 2710/3000 [40:31<02:06,  2.29it/s, v_num=1, train_loss_step=1.38e+6, train_loss_epoch=1.44e+6]Epoch 2711/3000:  90%|█████████ | 2711/3000 [40:32<02:08,  2.25it/s, v_num=1, train_loss_step=1.38e+6, train_loss_epoch=1.44e+6]Epoch 2711/3000:  90%|█████████ | 2711/3000 [40:32<02:08,  2.25it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2712/3000:  90%|█████████ | 2711/3000 [40:32<02:08,  2.25it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2712/3000:  90%|█████████ | 2712/3000 [40:32<02:04,  2.30it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2712/3000:  90%|█████████ | 2712/3000 [40:32<02:04,  2.30it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2713/3000:  90%|█████████ | 2712/3000 [40:32<02:04,  2.30it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2713/3000:  90%|█████████ | 2713/3000 [40:33<02:12,  2.17it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2713/3000:  90%|█████████ | 2713/3000 [40:33<02:12,  2.17it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2714/3000:  90%|█████████ | 2713/3000 [40:33<02:12,  2.17it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2714/3000:  90%|█████████ | 2714/3000 [40:33<02:15,  2.11it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2714/3000:  90%|█████████ | 2714/3000 [40:33<02:15,  2.11it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2715/3000:  90%|█████████ | 2714/3000 [40:33<02:15,  2.11it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2715/3000:  90%|█████████ | 2715/3000 [40:34<02:12,  2.14it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2715/3000:  90%|█████████ | 2715/3000 [40:34<02:12,  2.14it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2716/3000:  90%|█████████ | 2715/3000 [40:34<02:12,  2.14it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2716/3000:  91%|█████████ | 2716/3000 [40:34<02:07,  2.23it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2716/3000:  91%|█████████ | 2716/3000 [40:34<02:07,  2.23it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2717/3000:  91%|█████████ | 2716/3000 [40:34<02:07,  2.23it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2717/3000:  91%|█████████ | 2717/3000 [40:35<02:11,  2.14it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2717/3000:  91%|█████████ | 2717/3000 [40:35<02:11,  2.14it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.44e+6]Epoch 2718/3000:  91%|█████████ | 2717/3000 [40:35<02:11,  2.14it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.44e+6]Epoch 2718/3000:  91%|█████████ | 2718/3000 [40:35<02:10,  2.16it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.44e+6]Epoch 2718/3000:  91%|█████████ | 2718/3000 [40:35<02:10,  2.16it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2719/3000:  91%|█████████ | 2718/3000 [40:35<02:10,  2.16it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2719/3000:  91%|█████████ | 2719/3000 [40:36<03:06,  1.50it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2719/3000:  91%|█████████ | 2719/3000 [40:36<03:06,  1.50it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2720/3000:  91%|█████████ | 2719/3000 [40:36<03:06,  1.50it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2720/3000:  91%|█████████ | 2720/3000 [40:37<03:01,  1.54it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2720/3000:  91%|█████████ | 2720/3000 [40:37<03:01,  1.54it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2721/3000:  91%|█████████ | 2720/3000 [40:37<03:01,  1.54it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2721/3000:  91%|█████████ | 2721/3000 [40:37<02:51,  1.62it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2721/3000:  91%|█████████ | 2721/3000 [40:37<02:51,  1.62it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6] Epoch 2722/3000:  91%|█████████ | 2721/3000 [40:37<02:51,  1.62it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6]Epoch 2722/3000:  91%|█████████ | 2722/3000 [40:38<02:42,  1.71it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6]Epoch 2722/3000:  91%|█████████ | 2722/3000 [40:38<02:42,  1.71it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2723/3000:  91%|█████████ | 2722/3000 [40:38<02:42,  1.71it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2723/3000:  91%|█████████ | 2723/3000 [40:38<02:45,  1.67it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2723/3000:  91%|█████████ | 2723/3000 [40:38<02:45,  1.67it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2724/3000:  91%|█████████ | 2723/3000 [40:38<02:45,  1.67it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2724/3000:  91%|█████████ | 2724/3000 [40:39<02:40,  1.72it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2724/3000:  91%|█████████ | 2724/3000 [40:39<02:40,  1.72it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2725/3000:  91%|█████████ | 2724/3000 [40:39<02:40,  1.72it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2725/3000:  91%|█████████ | 2725/3000 [40:40<02:39,  1.72it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2725/3000:  91%|█████████ | 2725/3000 [40:40<02:39,  1.72it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2726/3000:  91%|█████████ | 2725/3000 [40:40<02:39,  1.72it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2726/3000:  91%|█████████ | 2726/3000 [40:40<02:31,  1.81it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2726/3000:  91%|█████████ | 2726/3000 [40:40<02:31,  1.81it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2727/3000:  91%|█████████ | 2726/3000 [40:40<02:31,  1.81it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2727/3000:  91%|█████████ | 2727/3000 [40:40<02:22,  1.92it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2727/3000:  91%|█████████ | 2727/3000 [40:40<02:22,  1.92it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2728/3000:  91%|█████████ | 2727/3000 [40:40<02:22,  1.92it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2728/3000:  91%|█████████ | 2728/3000 [45:20<6:22:28, 84.37s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2728/3000:  91%|█████████ | 2728/3000 [45:20<6:22:28, 84.37s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2729/3000:  91%|█████████ | 2728/3000 [45:21<6:22:28, 84.37s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2729/3000:  91%|█████████ | 2729/3000 [45:21<4:27:25, 59.21s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2729/3000:  91%|█████████ | 2729/3000 [45:21<4:27:25, 59.21s/it, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2730/3000:  91%|█████████ | 2729/3000 [45:21<4:27:25, 59.21s/it, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2730/3000:  91%|█████████ | 2730/3000 [45:22<3:07:14, 41.61s/it, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2730/3000:  91%|█████████ | 2730/3000 [45:22<3:07:14, 41.61s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2731/3000:  91%|█████████ | 2730/3000 [45:22<3:07:14, 41.61s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2731/3000:  91%|█████████ | 2731/3000 [45:22<2:11:13, 29.27s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2731/3000:  91%|█████████ | 2731/3000 [45:22<2:11:13, 29.27s/it, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2732/3000:  91%|█████████ | 2731/3000 [45:22<2:11:13, 29.27s/it, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2732/3000:  91%|█████████ | 2732/3000 [45:22<1:32:10, 20.64s/it, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2732/3000:  91%|█████████ | 2732/3000 [45:22<1:32:10, 20.64s/it, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2733/3000:  91%|█████████ | 2732/3000 [45:22<1:32:10, 20.64s/it, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2733/3000:  91%|█████████ | 2733/3000 [45:23<1:04:55, 14.59s/it, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2733/3000:  91%|█████████ | 2733/3000 [45:23<1:04:55, 14.59s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2734/3000:  91%|█████████ | 2733/3000 [45:23<1:04:55, 14.59s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2734/3000:  91%|█████████ | 2734/3000 [45:23<45:54, 10.35s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]  Epoch 2734/3000:  91%|█████████ | 2734/3000 [45:23<45:54, 10.35s/it, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2735/3000:  91%|█████████ | 2734/3000 [45:23<45:54, 10.35s/it, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2735/3000:  91%|█████████ | 2735/3000 [45:24<32:45,  7.42s/it, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2735/3000:  91%|█████████ | 2735/3000 [45:24<32:45,  7.42s/it, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.44e+6]Epoch 2736/3000:  91%|█████████ | 2735/3000 [45:24<32:45,  7.42s/it, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.44e+6]Epoch 2736/3000:  91%|█████████ | 2736/3000 [45:25<23:30,  5.34s/it, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.44e+6]Epoch 2736/3000:  91%|█████████ | 2736/3000 [45:25<23:30,  5.34s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2737/3000:  91%|█████████ | 2736/3000 [45:25<23:30,  5.34s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2737/3000:  91%|█████████ | 2737/3000 [45:25<17:04,  3.89s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2737/3000:  91%|█████████ | 2737/3000 [45:25<17:04,  3.89s/it, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6] Epoch 2738/3000:  91%|█████████ | 2737/3000 [45:25<17:04,  3.89s/it, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6]Epoch 2738/3000:  91%|█████████▏| 2738/3000 [45:26<12:39,  2.90s/it, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.44e+6]Epoch 2738/3000:  91%|█████████▏| 2738/3000 [45:26<12:39,  2.90s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2739/3000:  91%|█████████▏| 2738/3000 [45:26<12:39,  2.90s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2739/3000:  91%|█████████▏| 2739/3000 [45:26<09:25,  2.16s/it, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.44e+6]Epoch 2739/3000:  91%|█████████▏| 2739/3000 [45:26<09:25,  2.16s/it, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2740/3000:  91%|█████████▏| 2739/3000 [45:26<09:25,  2.16s/it, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2740/3000:  91%|█████████▏| 2740/3000 [45:27<07:19,  1.69s/it, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2740/3000:  91%|█████████▏| 2740/3000 [45:27<07:19,  1.69s/it, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2741/3000:  91%|█████████▏| 2740/3000 [45:27<07:19,  1.69s/it, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2741/3000:  91%|█████████▏| 2741/3000 [45:27<05:39,  1.31s/it, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2741/3000:  91%|█████████▏| 2741/3000 [45:27<05:39,  1.31s/it, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2742/3000:  91%|█████████▏| 2741/3000 [45:27<05:39,  1.31s/it, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2742/3000:  91%|█████████▏| 2742/3000 [45:28<04:34,  1.06s/it, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2742/3000:  91%|█████████▏| 2742/3000 [45:28<04:34,  1.06s/it, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2743/3000:  91%|█████████▏| 2742/3000 [45:28<04:34,  1.06s/it, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2743/3000:  91%|█████████▏| 2743/3000 [45:28<03:49,  1.12it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2743/3000:  91%|█████████▏| 2743/3000 [45:28<03:49,  1.12it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2744/3000:  91%|█████████▏| 2743/3000 [45:28<03:49,  1.12it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2744/3000:  91%|█████████▏| 2744/3000 [45:29<03:15,  1.31it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2744/3000:  91%|█████████▏| 2744/3000 [45:29<03:15,  1.31it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.44e+6]Epoch 2745/3000:  91%|█████████▏| 2744/3000 [45:29<03:15,  1.31it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.44e+6]Epoch 2745/3000:  92%|█████████▏| 2745/3000 [45:29<02:53,  1.47it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.44e+6]Epoch 2745/3000:  92%|█████████▏| 2745/3000 [45:29<02:53,  1.47it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2746/3000:  92%|█████████▏| 2745/3000 [45:29<02:53,  1.47it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2746/3000:  92%|█████████▏| 2746/3000 [45:29<02:35,  1.64it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.44e+6]Epoch 2746/3000:  92%|█████████▏| 2746/3000 [45:29<02:35,  1.64it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2747/3000:  92%|█████████▏| 2746/3000 [45:29<02:35,  1.64it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2747/3000:  92%|█████████▏| 2747/3000 [45:30<02:26,  1.73it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2747/3000:  92%|█████████▏| 2747/3000 [45:30<02:26,  1.73it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2748/3000:  92%|█████████▏| 2747/3000 [45:30<02:26,  1.73it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2748/3000:  92%|█████████▏| 2748/3000 [45:31<02:26,  1.72it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2748/3000:  92%|█████████▏| 2748/3000 [45:31<02:26,  1.72it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2749/3000:  92%|█████████▏| 2748/3000 [45:31<02:26,  1.72it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2749/3000:  92%|█████████▏| 2749/3000 [45:31<02:21,  1.78it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.44e+6]Epoch 2749/3000:  92%|█████████▏| 2749/3000 [45:31<02:21,  1.78it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2750/3000:  92%|█████████▏| 2749/3000 [45:31<02:21,  1.78it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2750/3000:  92%|█████████▏| 2750/3000 [45:32<02:24,  1.74it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2750/3000:  92%|█████████▏| 2750/3000 [45:32<02:24,  1.74it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.44e+6]Epoch 2751/3000:  92%|█████████▏| 2750/3000 [45:32<02:24,  1.74it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.44e+6]Epoch 2751/3000:  92%|█████████▏| 2751/3000 [45:32<02:25,  1.71it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.44e+6]Epoch 2751/3000:  92%|█████████▏| 2751/3000 [45:32<02:25,  1.71it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2752/3000:  92%|█████████▏| 2751/3000 [45:32<02:25,  1.71it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2752/3000:  92%|█████████▏| 2752/3000 [45:33<02:15,  1.83it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2752/3000:  92%|█████████▏| 2752/3000 [45:33<02:15,  1.83it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2753/3000:  92%|█████████▏| 2752/3000 [45:33<02:15,  1.83it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2753/3000:  92%|█████████▏| 2753/3000 [45:33<02:08,  1.93it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.44e+6]Epoch 2753/3000:  92%|█████████▏| 2753/3000 [45:33<02:08,  1.93it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2754/3000:  92%|█████████▏| 2753/3000 [45:33<02:08,  1.93it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2754/3000:  92%|█████████▏| 2754/3000 [45:34<02:12,  1.85it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.44e+6]Epoch 2754/3000:  92%|█████████▏| 2754/3000 [45:34<02:12,  1.85it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2755/3000:  92%|█████████▏| 2754/3000 [45:34<02:12,  1.85it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2755/3000:  92%|█████████▏| 2755/3000 [45:34<02:09,  1.89it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.44e+6]Epoch 2755/3000:  92%|█████████▏| 2755/3000 [45:34<02:09,  1.89it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2756/3000:  92%|█████████▏| 2755/3000 [45:34<02:09,  1.89it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2756/3000:  92%|█████████▏| 2756/3000 [45:35<02:09,  1.88it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.44e+6]Epoch 2756/3000:  92%|█████████▏| 2756/3000 [45:35<02:09,  1.88it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2757/3000:  92%|█████████▏| 2756/3000 [45:35<02:09,  1.88it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2757/3000:  92%|█████████▏| 2757/3000 [45:35<02:07,  1.91it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2757/3000:  92%|█████████▏| 2757/3000 [45:35<02:07,  1.91it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2758/3000:  92%|█████████▏| 2757/3000 [45:35<02:07,  1.91it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2758/3000:  92%|█████████▏| 2758/3000 [45:36<02:09,  1.87it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2758/3000:  92%|█████████▏| 2758/3000 [45:36<02:09,  1.87it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2759/3000:  92%|█████████▏| 2758/3000 [45:36<02:09,  1.87it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2759/3000:  92%|█████████▏| 2759/3000 [45:37<02:22,  1.69it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2759/3000:  92%|█████████▏| 2759/3000 [45:37<02:22,  1.69it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2760/3000:  92%|█████████▏| 2759/3000 [45:37<02:22,  1.69it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2760/3000:  92%|█████████▏| 2760/3000 [45:37<02:27,  1.63it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2760/3000:  92%|█████████▏| 2760/3000 [45:37<02:27,  1.63it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2761/3000:  92%|█████████▏| 2760/3000 [45:37<02:27,  1.63it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2761/3000:  92%|█████████▏| 2761/3000 [45:38<02:21,  1.69it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2761/3000:  92%|█████████▏| 2761/3000 [45:38<02:21,  1.69it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2762/3000:  92%|█████████▏| 2761/3000 [45:38<02:21,  1.69it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2762/3000:  92%|█████████▏| 2762/3000 [45:39<02:29,  1.59it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2762/3000:  92%|█████████▏| 2762/3000 [45:39<02:29,  1.59it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2763/3000:  92%|█████████▏| 2762/3000 [45:39<02:29,  1.59it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2763/3000:  92%|█████████▏| 2763/3000 [45:39<02:22,  1.66it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2763/3000:  92%|█████████▏| 2763/3000 [45:39<02:22,  1.66it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2764/3000:  92%|█████████▏| 2763/3000 [45:39<02:22,  1.66it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2764/3000:  92%|█████████▏| 2764/3000 [45:40<02:30,  1.57it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2764/3000:  92%|█████████▏| 2764/3000 [45:40<02:30,  1.57it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2765/3000:  92%|█████████▏| 2764/3000 [45:40<02:30,  1.57it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2765/3000:  92%|█████████▏| 2765/3000 [45:40<02:22,  1.65it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2765/3000:  92%|█████████▏| 2765/3000 [45:40<02:22,  1.65it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2766/3000:  92%|█████████▏| 2765/3000 [45:40<02:22,  1.65it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2766/3000:  92%|█████████▏| 2766/3000 [45:41<02:08,  1.83it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2766/3000:  92%|█████████▏| 2766/3000 [45:41<02:08,  1.83it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.43e+6]Epoch 2767/3000:  92%|█████████▏| 2766/3000 [45:41<02:08,  1.83it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.43e+6]Epoch 2767/3000:  92%|█████████▏| 2767/3000 [45:41<02:06,  1.85it/s, v_num=1, train_loss_step=1.52e+6, train_loss_epoch=1.43e+6]Epoch 2767/3000:  92%|█████████▏| 2767/3000 [45:41<02:06,  1.85it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2768/3000:  92%|█████████▏| 2767/3000 [45:41<02:06,  1.85it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2768/3000:  92%|█████████▏| 2768/3000 [45:42<02:05,  1.85it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2768/3000:  92%|█████████▏| 2768/3000 [45:42<02:05,  1.85it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2769/3000:  92%|█████████▏| 2768/3000 [45:42<02:05,  1.85it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2769/3000:  92%|█████████▏| 2769/3000 [45:42<02:09,  1.79it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2769/3000:  92%|█████████▏| 2769/3000 [45:42<02:09,  1.79it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2770/3000:  92%|█████████▏| 2769/3000 [45:42<02:09,  1.79it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2770/3000:  92%|█████████▏| 2770/3000 [45:43<02:09,  1.77it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2770/3000:  92%|█████████▏| 2770/3000 [45:43<02:09,  1.77it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2771/3000:  92%|█████████▏| 2770/3000 [45:43<02:09,  1.77it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2771/3000:  92%|█████████▏| 2771/3000 [45:43<02:03,  1.86it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2771/3000:  92%|█████████▏| 2771/3000 [45:43<02:03,  1.86it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2772/3000:  92%|█████████▏| 2771/3000 [45:43<02:03,  1.86it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2772/3000:  92%|█████████▏| 2772/3000 [45:44<02:00,  1.89it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2772/3000:  92%|█████████▏| 2772/3000 [45:44<02:00,  1.89it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2773/3000:  92%|█████████▏| 2772/3000 [45:44<02:00,  1.89it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2773/3000:  92%|█████████▏| 2773/3000 [45:44<01:58,  1.92it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2773/3000:  92%|█████████▏| 2773/3000 [45:44<01:58,  1.92it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2774/3000:  92%|█████████▏| 2773/3000 [45:44<01:58,  1.92it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2774/3000:  92%|█████████▏| 2774/3000 [45:45<01:54,  1.97it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2774/3000:  92%|█████████▏| 2774/3000 [45:45<01:54,  1.97it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2775/3000:  92%|█████████▏| 2774/3000 [45:45<01:54,  1.97it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2775/3000:  92%|█████████▎| 2775/3000 [45:45<01:57,  1.92it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2775/3000:  92%|█████████▎| 2775/3000 [45:45<01:57,  1.92it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2776/3000:  92%|█████████▎| 2775/3000 [45:45<01:57,  1.92it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2776/3000:  93%|█████████▎| 2776/3000 [45:46<01:58,  1.89it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2776/3000:  93%|█████████▎| 2776/3000 [45:46<01:58,  1.89it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2777/3000:  93%|█████████▎| 2776/3000 [45:46<01:58,  1.89it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2777/3000:  93%|█████████▎| 2777/3000 [45:47<02:04,  1.79it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2777/3000:  93%|█████████▎| 2777/3000 [45:47<02:04,  1.79it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2778/3000:  93%|█████████▎| 2777/3000 [45:47<02:04,  1.79it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2778/3000:  93%|█████████▎| 2778/3000 [45:47<02:01,  1.83it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2778/3000:  93%|█████████▎| 2778/3000 [45:47<02:01,  1.83it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2779/3000:  93%|█████████▎| 2778/3000 [45:47<02:01,  1.83it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2779/3000:  93%|█████████▎| 2779/3000 [45:48<02:04,  1.78it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2779/3000:  93%|█████████▎| 2779/3000 [45:48<02:04,  1.78it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2780/3000:  93%|█████████▎| 2779/3000 [45:48<02:04,  1.78it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2780/3000:  93%|█████████▎| 2780/3000 [45:48<01:58,  1.86it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2780/3000:  93%|█████████▎| 2780/3000 [45:48<01:58,  1.86it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2781/3000:  93%|█████████▎| 2780/3000 [45:48<01:58,  1.86it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2781/3000:  93%|█████████▎| 2781/3000 [45:49<01:52,  1.95it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2781/3000:  93%|█████████▎| 2781/3000 [45:49<01:52,  1.95it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2782/3000:  93%|█████████▎| 2781/3000 [45:49<01:52,  1.95it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2782/3000:  93%|█████████▎| 2782/3000 [45:49<01:47,  2.03it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2782/3000:  93%|█████████▎| 2782/3000 [45:49<01:47,  2.03it/s, v_num=1, train_loss_step=1.38e+6, train_loss_epoch=1.43e+6]Epoch 2783/3000:  93%|█████████▎| 2782/3000 [45:49<01:47,  2.03it/s, v_num=1, train_loss_step=1.38e+6, train_loss_epoch=1.43e+6]Epoch 2783/3000:  93%|█████████▎| 2783/3000 [45:50<01:45,  2.05it/s, v_num=1, train_loss_step=1.38e+6, train_loss_epoch=1.43e+6]Epoch 2783/3000:  93%|█████████▎| 2783/3000 [45:50<01:45,  2.05it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.43e+6]Epoch 2784/3000:  93%|█████████▎| 2783/3000 [45:50<01:45,  2.05it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.43e+6]Epoch 2784/3000:  93%|█████████▎| 2784/3000 [45:50<01:44,  2.07it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.43e+6]Epoch 2784/3000:  93%|█████████▎| 2784/3000 [45:50<01:44,  2.07it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2785/3000:  93%|█████████▎| 2784/3000 [45:50<01:44,  2.07it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2785/3000:  93%|█████████▎| 2785/3000 [45:51<01:45,  2.05it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2785/3000:  93%|█████████▎| 2785/3000 [45:51<01:45,  2.05it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2786/3000:  93%|█████████▎| 2785/3000 [45:51<01:45,  2.05it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2786/3000:  93%|█████████▎| 2786/3000 [45:51<01:39,  2.15it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2786/3000:  93%|█████████▎| 2786/3000 [45:51<01:39,  2.15it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2787/3000:  93%|█████████▎| 2786/3000 [45:51<01:39,  2.15it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2787/3000:  93%|█████████▎| 2787/3000 [45:51<01:30,  2.35it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2787/3000:  93%|█████████▎| 2787/3000 [45:51<01:30,  2.35it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2788/3000:  93%|█████████▎| 2787/3000 [45:51<01:30,  2.35it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2788/3000:  93%|█████████▎| 2788/3000 [45:52<01:35,  2.23it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2788/3000:  93%|█████████▎| 2788/3000 [45:52<01:35,  2.23it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2789/3000:  93%|█████████▎| 2788/3000 [45:52<01:35,  2.23it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2789/3000:  93%|█████████▎| 2789/3000 [45:52<01:38,  2.14it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2789/3000:  93%|█████████▎| 2789/3000 [45:52<01:38,  2.14it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2790/3000:  93%|█████████▎| 2789/3000 [45:52<01:38,  2.14it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2790/3000:  93%|█████████▎| 2790/3000 [45:53<01:38,  2.13it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2790/3000:  93%|█████████▎| 2790/3000 [45:53<01:38,  2.13it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2791/3000:  93%|█████████▎| 2790/3000 [45:53<01:38,  2.13it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2791/3000:  93%|█████████▎| 2791/3000 [45:53<01:37,  2.15it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2791/3000:  93%|█████████▎| 2791/3000 [45:53<01:37,  2.15it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2792/3000:  93%|█████████▎| 2791/3000 [45:53<01:37,  2.15it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2792/3000:  93%|█████████▎| 2792/3000 [45:54<01:39,  2.09it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2792/3000:  93%|█████████▎| 2792/3000 [45:54<01:39,  2.09it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2793/3000:  93%|█████████▎| 2792/3000 [45:54<01:39,  2.09it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2793/3000:  93%|█████████▎| 2793/3000 [45:54<01:35,  2.17it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2793/3000:  93%|█████████▎| 2793/3000 [45:54<01:35,  2.17it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2794/3000:  93%|█████████▎| 2793/3000 [45:54<01:35,  2.17it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2794/3000:  93%|█████████▎| 2794/3000 [45:55<01:36,  2.14it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2794/3000:  93%|█████████▎| 2794/3000 [45:55<01:36,  2.14it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2795/3000:  93%|█████████▎| 2794/3000 [45:55<01:36,  2.14it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2795/3000:  93%|█████████▎| 2795/3000 [45:55<01:38,  2.08it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2795/3000:  93%|█████████▎| 2795/3000 [45:55<01:38,  2.08it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2796/3000:  93%|█████████▎| 2795/3000 [45:55<01:38,  2.08it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2796/3000:  93%|█████████▎| 2796/3000 [45:56<01:40,  2.03it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2796/3000:  93%|█████████▎| 2796/3000 [45:56<01:40,  2.03it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2797/3000:  93%|█████████▎| 2796/3000 [45:56<01:40,  2.03it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2797/3000:  93%|█████████▎| 2797/3000 [45:56<01:37,  2.09it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2797/3000:  93%|█████████▎| 2797/3000 [45:56<01:37,  2.09it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2798/3000:  93%|█████████▎| 2797/3000 [45:56<01:37,  2.09it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2798/3000:  93%|█████████▎| 2798/3000 [45:57<01:33,  2.16it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2798/3000:  93%|█████████▎| 2798/3000 [45:57<01:33,  2.16it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2799/3000:  93%|█████████▎| 2798/3000 [45:57<01:33,  2.16it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2799/3000:  93%|█████████▎| 2799/3000 [45:57<01:35,  2.11it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2799/3000:  93%|█████████▎| 2799/3000 [45:57<01:35,  2.11it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2800/3000:  93%|█████████▎| 2799/3000 [45:57<01:35,  2.11it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2800/3000:  93%|█████████▎| 2800/3000 [45:58<01:38,  2.04it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2800/3000:  93%|█████████▎| 2800/3000 [45:58<01:38,  2.04it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2801/3000:  93%|█████████▎| 2800/3000 [45:58<01:38,  2.04it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2801/3000:  93%|█████████▎| 2801/3000 [45:58<01:41,  1.96it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2801/3000:  93%|█████████▎| 2801/3000 [45:58<01:41,  1.96it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2802/3000:  93%|█████████▎| 2801/3000 [45:58<01:41,  1.96it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2802/3000:  93%|█████████▎| 2802/3000 [45:59<01:37,  2.03it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2802/3000:  93%|█████████▎| 2802/3000 [45:59<01:37,  2.03it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2803/3000:  93%|█████████▎| 2802/3000 [45:59<01:37,  2.03it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2803/3000:  93%|█████████▎| 2803/3000 [45:59<01:33,  2.12it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2803/3000:  93%|█████████▎| 2803/3000 [45:59<01:33,  2.12it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2804/3000:  93%|█████████▎| 2803/3000 [45:59<01:33,  2.12it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2804/3000:  93%|█████████▎| 2804/3000 [46:00<01:35,  2.05it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2804/3000:  93%|█████████▎| 2804/3000 [46:00<01:35,  2.05it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2805/3000:  93%|█████████▎| 2804/3000 [46:00<01:35,  2.05it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2805/3000:  94%|█████████▎| 2805/3000 [46:00<01:35,  2.04it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2805/3000:  94%|█████████▎| 2805/3000 [46:00<01:35,  2.04it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2806/3000:  94%|█████████▎| 2805/3000 [46:00<01:35,  2.04it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2806/3000:  94%|█████████▎| 2806/3000 [46:01<01:38,  1.98it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2806/3000:  94%|█████████▎| 2806/3000 [46:01<01:38,  1.98it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2807/3000:  94%|█████████▎| 2806/3000 [46:01<01:38,  1.98it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2807/3000:  94%|█████████▎| 2807/3000 [46:01<01:32,  2.09it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2807/3000:  94%|█████████▎| 2807/3000 [46:01<01:32,  2.09it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2808/3000:  94%|█████████▎| 2807/3000 [46:01<01:32,  2.09it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2808/3000:  94%|█████████▎| 2808/3000 [46:02<01:31,  2.11it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2808/3000:  94%|█████████▎| 2808/3000 [46:02<01:31,  2.11it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2809/3000:  94%|█████████▎| 2808/3000 [46:02<01:31,  2.11it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2809/3000:  94%|█████████▎| 2809/3000 [46:02<01:29,  2.13it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2809/3000:  94%|█████████▎| 2809/3000 [46:02<01:29,  2.13it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2810/3000:  94%|█████████▎| 2809/3000 [46:02<01:29,  2.13it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2810/3000:  94%|█████████▎| 2810/3000 [46:02<01:28,  2.15it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2810/3000:  94%|█████████▎| 2810/3000 [46:02<01:28,  2.15it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2811/3000:  94%|█████████▎| 2810/3000 [46:02<01:28,  2.15it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2811/3000:  94%|█████████▎| 2811/3000 [46:03<01:25,  2.21it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2811/3000:  94%|█████████▎| 2811/3000 [46:03<01:25,  2.21it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2812/3000:  94%|█████████▎| 2811/3000 [46:03<01:25,  2.21it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2812/3000:  94%|█████████▎| 2812/3000 [46:03<01:31,  2.05it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2812/3000:  94%|█████████▎| 2812/3000 [46:03<01:31,  2.05it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2813/3000:  94%|█████████▎| 2812/3000 [46:03<01:31,  2.05it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2813/3000:  94%|█████████▍| 2813/3000 [46:04<01:28,  2.12it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2813/3000:  94%|█████████▍| 2813/3000 [46:04<01:28,  2.12it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2814/3000:  94%|█████████▍| 2813/3000 [46:04<01:28,  2.12it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2814/3000:  94%|█████████▍| 2814/3000 [46:04<01:26,  2.15it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2814/3000:  94%|█████████▍| 2814/3000 [46:04<01:26,  2.15it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.43e+6]Epoch 2815/3000:  94%|█████████▍| 2814/3000 [46:04<01:26,  2.15it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.43e+6]Epoch 2815/3000:  94%|█████████▍| 2815/3000 [46:05<01:25,  2.18it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.43e+6]Epoch 2815/3000:  94%|█████████▍| 2815/3000 [46:05<01:25,  2.18it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2816/3000:  94%|█████████▍| 2815/3000 [46:05<01:25,  2.18it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2816/3000:  94%|█████████▍| 2816/3000 [46:05<01:25,  2.16it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2816/3000:  94%|█████████▍| 2816/3000 [46:05<01:25,  2.16it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2817/3000:  94%|█████████▍| 2816/3000 [46:05<01:25,  2.16it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2817/3000:  94%|█████████▍| 2817/3000 [46:06<01:22,  2.21it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2817/3000:  94%|█████████▍| 2817/3000 [46:06<01:22,  2.21it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2818/3000:  94%|█████████▍| 2817/3000 [46:06<01:22,  2.21it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2818/3000:  94%|█████████▍| 2818/3000 [46:06<01:24,  2.14it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2818/3000:  94%|█████████▍| 2818/3000 [46:06<01:24,  2.14it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2819/3000:  94%|█████████▍| 2818/3000 [46:06<01:24,  2.14it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2819/3000:  94%|█████████▍| 2819/3000 [46:07<01:25,  2.11it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2819/3000:  94%|█████████▍| 2819/3000 [46:07<01:25,  2.11it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2820/3000:  94%|█████████▍| 2819/3000 [46:07<01:25,  2.11it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2820/3000:  94%|█████████▍| 2820/3000 [46:07<01:34,  1.91it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2820/3000:  94%|█████████▍| 2820/3000 [46:07<01:34,  1.91it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2821/3000:  94%|█████████▍| 2820/3000 [46:07<01:34,  1.91it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2821/3000:  94%|█████████▍| 2821/3000 [46:08<01:30,  1.98it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2821/3000:  94%|█████████▍| 2821/3000 [46:08<01:30,  1.98it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2822/3000:  94%|█████████▍| 2821/3000 [46:08<01:30,  1.98it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2822/3000:  94%|█████████▍| 2822/3000 [46:08<01:26,  2.05it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2822/3000:  94%|█████████▍| 2822/3000 [46:08<01:26,  2.05it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2823/3000:  94%|█████████▍| 2822/3000 [46:08<01:26,  2.05it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2823/3000:  94%|█████████▍| 2823/3000 [46:09<01:28,  2.00it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2823/3000:  94%|█████████▍| 2823/3000 [46:09<01:28,  2.00it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2824/3000:  94%|█████████▍| 2823/3000 [46:09<01:28,  2.00it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2824/3000:  94%|█████████▍| 2824/3000 [46:09<01:22,  2.13it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2824/3000:  94%|█████████▍| 2824/3000 [46:09<01:22,  2.13it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2825/3000:  94%|█████████▍| 2824/3000 [46:09<01:22,  2.13it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2825/3000:  94%|█████████▍| 2825/3000 [46:10<01:23,  2.09it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2825/3000:  94%|█████████▍| 2825/3000 [46:10<01:23,  2.09it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2826/3000:  94%|█████████▍| 2825/3000 [46:10<01:23,  2.09it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2826/3000:  94%|█████████▍| 2826/3000 [46:10<01:24,  2.06it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2826/3000:  94%|█████████▍| 2826/3000 [46:10<01:24,  2.06it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2827/3000:  94%|█████████▍| 2826/3000 [46:10<01:24,  2.06it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2827/3000:  94%|█████████▍| 2827/3000 [46:11<01:25,  2.03it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2827/3000:  94%|█████████▍| 2827/3000 [46:11<01:25,  2.03it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2828/3000:  94%|█████████▍| 2827/3000 [46:11<01:25,  2.03it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2828/3000:  94%|█████████▍| 2828/3000 [46:11<01:27,  1.96it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2828/3000:  94%|█████████▍| 2828/3000 [46:11<01:27,  1.96it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2829/3000:  94%|█████████▍| 2828/3000 [46:11<01:27,  1.96it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2829/3000:  94%|█████████▍| 2829/3000 [46:12<01:34,  1.81it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2829/3000:  94%|█████████▍| 2829/3000 [46:12<01:34,  1.81it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2830/3000:  94%|█████████▍| 2829/3000 [46:12<01:34,  1.81it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2830/3000:  94%|█████████▍| 2830/3000 [46:12<01:31,  1.87it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2830/3000:  94%|█████████▍| 2830/3000 [46:12<01:31,  1.87it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2831/3000:  94%|█████████▍| 2830/3000 [46:12<01:31,  1.87it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2831/3000:  94%|█████████▍| 2831/3000 [46:13<01:27,  1.94it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2831/3000:  94%|█████████▍| 2831/3000 [46:13<01:27,  1.94it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2832/3000:  94%|█████████▍| 2831/3000 [46:13<01:27,  1.94it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2832/3000:  94%|█████████▍| 2832/3000 [46:13<01:24,  1.99it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2832/3000:  94%|█████████▍| 2832/3000 [46:13<01:24,  1.99it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2833/3000:  94%|█████████▍| 2832/3000 [46:13<01:24,  1.99it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2833/3000:  94%|█████████▍| 2833/3000 [46:14<01:25,  1.94it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2833/3000:  94%|█████████▍| 2833/3000 [46:14<01:25,  1.94it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2834/3000:  94%|█████████▍| 2833/3000 [46:14<01:25,  1.94it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2834/3000:  94%|█████████▍| 2834/3000 [46:14<01:21,  2.04it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2834/3000:  94%|█████████▍| 2834/3000 [46:14<01:21,  2.04it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2835/3000:  94%|█████████▍| 2834/3000 [46:14<01:21,  2.04it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2835/3000:  94%|█████████▍| 2835/3000 [46:15<01:32,  1.79it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2835/3000:  94%|█████████▍| 2835/3000 [46:15<01:32,  1.79it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2836/3000:  94%|█████████▍| 2835/3000 [46:15<01:32,  1.79it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2836/3000:  95%|█████████▍| 2836/3000 [46:16<01:31,  1.80it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2836/3000:  95%|█████████▍| 2836/3000 [46:16<01:31,  1.80it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2837/3000:  95%|█████████▍| 2836/3000 [46:16<01:31,  1.80it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2837/3000:  95%|█████████▍| 2837/3000 [46:16<01:27,  1.86it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2837/3000:  95%|█████████▍| 2837/3000 [46:16<01:27,  1.86it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2838/3000:  95%|█████████▍| 2837/3000 [46:16<01:27,  1.86it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2838/3000:  95%|█████████▍| 2838/3000 [46:16<01:24,  1.92it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2838/3000:  95%|█████████▍| 2838/3000 [46:16<01:24,  1.92it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2839/3000:  95%|█████████▍| 2838/3000 [46:16<01:24,  1.92it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2839/3000:  95%|█████████▍| 2839/3000 [46:17<01:20,  2.01it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2839/3000:  95%|█████████▍| 2839/3000 [46:17<01:20,  2.01it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2840/3000:  95%|█████████▍| 2839/3000 [46:17<01:20,  2.01it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2840/3000:  95%|█████████▍| 2840/3000 [46:17<01:19,  2.02it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2840/3000:  95%|█████████▍| 2840/3000 [46:17<01:19,  2.02it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2841/3000:  95%|█████████▍| 2840/3000 [46:17<01:19,  2.02it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2841/3000:  95%|█████████▍| 2841/3000 [46:18<01:18,  2.03it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2841/3000:  95%|█████████▍| 2841/3000 [46:18<01:18,  2.03it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2842/3000:  95%|█████████▍| 2841/3000 [46:18<01:18,  2.03it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2842/3000:  95%|█████████▍| 2842/3000 [46:18<01:17,  2.04it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2842/3000:  95%|█████████▍| 2842/3000 [46:18<01:17,  2.04it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2843/3000:  95%|█████████▍| 2842/3000 [46:18<01:17,  2.04it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2843/3000:  95%|█████████▍| 2843/3000 [46:19<01:15,  2.08it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2843/3000:  95%|█████████▍| 2843/3000 [46:19<01:15,  2.08it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2844/3000:  95%|█████████▍| 2843/3000 [46:19<01:15,  2.08it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2844/3000:  95%|█████████▍| 2844/3000 [46:19<01:17,  2.02it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2844/3000:  95%|█████████▍| 2844/3000 [46:19<01:17,  2.02it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2845/3000:  95%|█████████▍| 2844/3000 [46:19<01:17,  2.02it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2845/3000:  95%|█████████▍| 2845/3000 [46:20<01:17,  2.00it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2845/3000:  95%|█████████▍| 2845/3000 [46:20<01:17,  2.00it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2846/3000:  95%|█████████▍| 2845/3000 [46:20<01:17,  2.00it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2846/3000:  95%|█████████▍| 2846/3000 [46:20<01:13,  2.10it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2846/3000:  95%|█████████▍| 2846/3000 [46:20<01:13,  2.10it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2847/3000:  95%|█████████▍| 2846/3000 [46:20<01:13,  2.10it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2847/3000:  95%|█████████▍| 2847/3000 [46:21<01:11,  2.13it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2847/3000:  95%|█████████▍| 2847/3000 [46:21<01:11,  2.13it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2848/3000:  95%|█████████▍| 2847/3000 [46:21<01:11,  2.13it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2848/3000:  95%|█████████▍| 2848/3000 [46:21<01:12,  2.08it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2848/3000:  95%|█████████▍| 2848/3000 [46:21<01:12,  2.08it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2849/3000:  95%|█████████▍| 2848/3000 [46:21<01:12,  2.08it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2849/3000:  95%|█████████▍| 2849/3000 [46:22<01:15,  2.00it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2849/3000:  95%|█████████▍| 2849/3000 [46:22<01:15,  2.00it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2850/3000:  95%|█████████▍| 2849/3000 [46:22<01:15,  2.00it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2850/3000:  95%|█████████▌| 2850/3000 [46:22<01:13,  2.03it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2850/3000:  95%|█████████▌| 2850/3000 [46:22<01:13,  2.03it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2851/3000:  95%|█████████▌| 2850/3000 [46:22<01:13,  2.03it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2851/3000:  95%|█████████▌| 2851/3000 [46:23<01:11,  2.08it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2851/3000:  95%|█████████▌| 2851/3000 [46:23<01:11,  2.08it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2852/3000:  95%|█████████▌| 2851/3000 [46:23<01:11,  2.08it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2852/3000:  95%|█████████▌| 2852/3000 [46:23<01:11,  2.06it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2852/3000:  95%|█████████▌| 2852/3000 [46:23<01:11,  2.06it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2853/3000:  95%|█████████▌| 2852/3000 [46:23<01:11,  2.06it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2853/3000:  95%|█████████▌| 2853/3000 [46:24<01:15,  1.95it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2853/3000:  95%|█████████▌| 2853/3000 [46:24<01:15,  1.95it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2854/3000:  95%|█████████▌| 2853/3000 [46:24<01:15,  1.95it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2854/3000:  95%|█████████▌| 2854/3000 [46:24<01:10,  2.08it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2854/3000:  95%|█████████▌| 2854/3000 [46:24<01:10,  2.08it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2855/3000:  95%|█████████▌| 2854/3000 [46:24<01:10,  2.08it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2855/3000:  95%|█████████▌| 2855/3000 [46:25<01:08,  2.12it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2855/3000:  95%|█████████▌| 2855/3000 [46:25<01:08,  2.12it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2856/3000:  95%|█████████▌| 2855/3000 [46:25<01:08,  2.12it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2856/3000:  95%|█████████▌| 2856/3000 [46:25<01:06,  2.16it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2856/3000:  95%|█████████▌| 2856/3000 [46:25<01:06,  2.16it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2857/3000:  95%|█████████▌| 2856/3000 [46:25<01:06,  2.16it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2857/3000:  95%|█████████▌| 2857/3000 [46:26<01:05,  2.17it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2857/3000:  95%|█████████▌| 2857/3000 [46:26<01:05,  2.17it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2858/3000:  95%|█████████▌| 2857/3000 [46:26<01:05,  2.17it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2858/3000:  95%|█████████▌| 2858/3000 [46:26<01:05,  2.18it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2858/3000:  95%|█████████▌| 2858/3000 [46:26<01:05,  2.18it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2859/3000:  95%|█████████▌| 2858/3000 [46:26<01:05,  2.18it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2859/3000:  95%|█████████▌| 2859/3000 [46:27<01:09,  2.04it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2859/3000:  95%|█████████▌| 2859/3000 [46:27<01:09,  2.04it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.43e+6]Epoch 2860/3000:  95%|█████████▌| 2859/3000 [46:27<01:09,  2.04it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.43e+6]Epoch 2860/3000:  95%|█████████▌| 2860/3000 [46:27<01:06,  2.09it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.43e+6]Epoch 2860/3000:  95%|█████████▌| 2860/3000 [46:27<01:06,  2.09it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2861/3000:  95%|█████████▌| 2860/3000 [46:27<01:06,  2.09it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2861/3000:  95%|█████████▌| 2861/3000 [46:27<01:05,  2.14it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2861/3000:  95%|█████████▌| 2861/3000 [46:27<01:05,  2.14it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2862/3000:  95%|█████████▌| 2861/3000 [46:27<01:05,  2.14it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2862/3000:  95%|█████████▌| 2862/3000 [46:28<01:05,  2.10it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2862/3000:  95%|█████████▌| 2862/3000 [46:28<01:05,  2.10it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2863/3000:  95%|█████████▌| 2862/3000 [46:28<01:05,  2.10it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2863/3000:  95%|█████████▌| 2863/3000 [46:28<01:07,  2.03it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2863/3000:  95%|█████████▌| 2863/3000 [46:28<01:07,  2.03it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2864/3000:  95%|█████████▌| 2863/3000 [46:29<01:07,  2.03it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2864/3000:  95%|█████████▌| 2864/3000 [46:29<01:05,  2.09it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2864/3000:  95%|█████████▌| 2864/3000 [46:29<01:05,  2.09it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2865/3000:  95%|█████████▌| 2864/3000 [46:29<01:05,  2.09it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2865/3000:  96%|█████████▌| 2865/3000 [46:29<01:05,  2.07it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2865/3000:  96%|█████████▌| 2865/3000 [46:29<01:05,  2.07it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2866/3000:  96%|█████████▌| 2865/3000 [46:29<01:05,  2.07it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2866/3000:  96%|█████████▌| 2866/3000 [46:30<01:05,  2.05it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2866/3000:  96%|█████████▌| 2866/3000 [46:30<01:05,  2.05it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2867/3000:  96%|█████████▌| 2866/3000 [46:30<01:05,  2.05it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2867/3000:  96%|█████████▌| 2867/3000 [46:30<01:06,  2.01it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2867/3000:  96%|█████████▌| 2867/3000 [46:30<01:06,  2.01it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2868/3000:  96%|█████████▌| 2867/3000 [46:30<01:06,  2.01it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2868/3000:  96%|█████████▌| 2868/3000 [46:31<01:08,  1.93it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2868/3000:  96%|█████████▌| 2868/3000 [46:31<01:08,  1.93it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2869/3000:  96%|█████████▌| 2868/3000 [46:31<01:08,  1.93it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2869/3000:  96%|█████████▌| 2869/3000 [46:31<01:03,  2.07it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2869/3000:  96%|█████████▌| 2869/3000 [46:31<01:03,  2.07it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2870/3000:  96%|█████████▌| 2869/3000 [46:31<01:03,  2.07it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2870/3000:  96%|█████████▌| 2870/3000 [46:32<01:01,  2.11it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2870/3000:  96%|█████████▌| 2870/3000 [46:32<01:01,  2.11it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2871/3000:  96%|█████████▌| 2870/3000 [46:32<01:01,  2.11it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2871/3000:  96%|█████████▌| 2871/3000 [46:32<00:56,  2.27it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2871/3000:  96%|█████████▌| 2871/3000 [46:32<00:56,  2.27it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2872/3000:  96%|█████████▌| 2871/3000 [46:32<00:56,  2.27it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2872/3000:  96%|█████████▌| 2872/3000 [46:33<00:58,  2.20it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2872/3000:  96%|█████████▌| 2872/3000 [46:33<00:58,  2.20it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2873/3000:  96%|█████████▌| 2872/3000 [46:33<00:58,  2.20it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2873/3000:  96%|█████████▌| 2873/3000 [46:33<00:59,  2.12it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2873/3000:  96%|█████████▌| 2873/3000 [46:33<00:59,  2.12it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2874/3000:  96%|█████████▌| 2873/3000 [46:33<00:59,  2.12it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2874/3000:  96%|█████████▌| 2874/3000 [46:34<00:59,  2.12it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2874/3000:  96%|█████████▌| 2874/3000 [46:34<00:59,  2.12it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2875/3000:  96%|█████████▌| 2874/3000 [46:34<00:59,  2.12it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2875/3000:  96%|█████████▌| 2875/3000 [46:34<01:03,  1.96it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2875/3000:  96%|█████████▌| 2875/3000 [46:34<01:03,  1.96it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2876/3000:  96%|█████████▌| 2875/3000 [46:34<01:03,  1.96it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2876/3000:  96%|█████████▌| 2876/3000 [46:35<01:03,  1.95it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2876/3000:  96%|█████████▌| 2876/3000 [46:35<01:03,  1.95it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2877/3000:  96%|█████████▌| 2876/3000 [46:35<01:03,  1.95it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2877/3000:  96%|█████████▌| 2877/3000 [46:35<01:04,  1.90it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2877/3000:  96%|█████████▌| 2877/3000 [46:35<01:04,  1.90it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2878/3000:  96%|█████████▌| 2877/3000 [46:35<01:04,  1.90it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2878/3000:  96%|█████████▌| 2878/3000 [46:36<01:02,  1.96it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2878/3000:  96%|█████████▌| 2878/3000 [46:36<01:02,  1.96it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2879/3000:  96%|█████████▌| 2878/3000 [46:36<01:02,  1.96it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2879/3000:  96%|█████████▌| 2879/3000 [46:36<00:58,  2.07it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2879/3000:  96%|█████████▌| 2879/3000 [46:36<00:58,  2.07it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2880/3000:  96%|█████████▌| 2879/3000 [46:36<00:58,  2.07it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2880/3000:  96%|█████████▌| 2880/3000 [46:37<00:56,  2.11it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2880/3000:  96%|█████████▌| 2880/3000 [46:37<00:56,  2.11it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2881/3000:  96%|█████████▌| 2880/3000 [46:37<00:56,  2.11it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2881/3000:  96%|█████████▌| 2881/3000 [46:37<00:58,  2.04it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2881/3000:  96%|█████████▌| 2881/3000 [46:37<00:58,  2.04it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2882/3000:  96%|█████████▌| 2881/3000 [46:37<00:58,  2.04it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2882/3000:  96%|█████████▌| 2882/3000 [46:38<00:57,  2.06it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2882/3000:  96%|█████████▌| 2882/3000 [46:38<00:57,  2.06it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2883/3000:  96%|█████████▌| 2882/3000 [46:38<00:57,  2.06it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2883/3000:  96%|█████████▌| 2883/3000 [46:38<00:57,  2.04it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2883/3000:  96%|█████████▌| 2883/3000 [46:38<00:57,  2.04it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2884/3000:  96%|█████████▌| 2883/3000 [46:38<00:57,  2.04it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2884/3000:  96%|█████████▌| 2884/3000 [46:39<00:51,  2.24it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2884/3000:  96%|█████████▌| 2884/3000 [46:39<00:51,  2.24it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2885/3000:  96%|█████████▌| 2884/3000 [46:39<00:51,  2.24it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2885/3000:  96%|█████████▌| 2885/3000 [46:39<00:50,  2.27it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2885/3000:  96%|█████████▌| 2885/3000 [46:39<00:50,  2.27it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2886/3000:  96%|█████████▌| 2885/3000 [46:39<00:50,  2.27it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2886/3000:  96%|█████████▌| 2886/3000 [46:39<00:51,  2.22it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2886/3000:  96%|█████████▌| 2886/3000 [46:39<00:51,  2.22it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2887/3000:  96%|█████████▌| 2886/3000 [46:39<00:51,  2.22it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2887/3000:  96%|█████████▌| 2887/3000 [46:40<00:50,  2.26it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2887/3000:  96%|█████████▌| 2887/3000 [46:40<00:50,  2.26it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2888/3000:  96%|█████████▌| 2887/3000 [46:40<00:50,  2.26it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2888/3000:  96%|█████████▋| 2888/3000 [46:40<00:51,  2.16it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2888/3000:  96%|█████████▋| 2888/3000 [46:40<00:51,  2.16it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2889/3000:  96%|█████████▋| 2888/3000 [46:40<00:51,  2.16it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2889/3000:  96%|█████████▋| 2889/3000 [46:41<00:53,  2.09it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2889/3000:  96%|█████████▋| 2889/3000 [46:41<00:53,  2.09it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2890/3000:  96%|█████████▋| 2889/3000 [46:41<00:53,  2.09it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2890/3000:  96%|█████████▋| 2890/3000 [46:41<00:53,  2.07it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2890/3000:  96%|█████████▋| 2890/3000 [46:41<00:53,  2.07it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2891/3000:  96%|█████████▋| 2890/3000 [46:41<00:53,  2.07it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2891/3000:  96%|█████████▋| 2891/3000 [46:42<00:49,  2.21it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2891/3000:  96%|█████████▋| 2891/3000 [46:42<00:49,  2.21it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2892/3000:  96%|█████████▋| 2891/3000 [46:42<00:49,  2.21it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2892/3000:  96%|█████████▋| 2892/3000 [46:42<00:49,  2.16it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2892/3000:  96%|█████████▋| 2892/3000 [46:42<00:49,  2.16it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2893/3000:  96%|█████████▋| 2892/3000 [46:42<00:49,  2.16it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2893/3000:  96%|█████████▋| 2893/3000 [46:43<00:49,  2.18it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2893/3000:  96%|█████████▋| 2893/3000 [46:43<00:49,  2.18it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2894/3000:  96%|█████████▋| 2893/3000 [46:43<00:49,  2.18it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2894/3000:  96%|█████████▋| 2894/3000 [46:43<00:49,  2.15it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2894/3000:  96%|█████████▋| 2894/3000 [46:43<00:49,  2.15it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2895/3000:  96%|█████████▋| 2894/3000 [46:43<00:49,  2.15it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2895/3000:  96%|█████████▋| 2895/3000 [46:44<00:50,  2.08it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2895/3000:  96%|█████████▋| 2895/3000 [46:44<00:50,  2.08it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2896/3000:  96%|█████████▋| 2895/3000 [46:44<00:50,  2.08it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2896/3000:  97%|█████████▋| 2896/3000 [46:44<00:51,  2.01it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2896/3000:  97%|█████████▋| 2896/3000 [46:44<00:51,  2.01it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2897/3000:  97%|█████████▋| 2896/3000 [46:44<00:51,  2.01it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2897/3000:  97%|█████████▋| 2897/3000 [46:45<00:52,  1.97it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2897/3000:  97%|█████████▋| 2897/3000 [46:45<00:52,  1.97it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2898/3000:  97%|█████████▋| 2897/3000 [46:45<00:52,  1.97it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2898/3000:  97%|█████████▋| 2898/3000 [46:45<00:51,  1.99it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2898/3000:  97%|█████████▋| 2898/3000 [46:45<00:51,  1.99it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2899/3000:  97%|█████████▋| 2898/3000 [46:45<00:51,  1.99it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2899/3000:  97%|█████████▋| 2899/3000 [46:46<00:49,  2.02it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2899/3000:  97%|█████████▋| 2899/3000 [46:46<00:49,  2.02it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2900/3000:  97%|█████████▋| 2899/3000 [46:46<00:49,  2.02it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2900/3000:  97%|█████████▋| 2900/3000 [46:46<00:48,  2.06it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2900/3000:  97%|█████████▋| 2900/3000 [46:46<00:48,  2.06it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2901/3000:  97%|█████████▋| 2900/3000 [46:46<00:48,  2.06it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2901/3000:  97%|█████████▋| 2901/3000 [46:47<00:50,  1.98it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2901/3000:  97%|█████████▋| 2901/3000 [46:47<00:50,  1.98it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2902/3000:  97%|█████████▋| 2901/3000 [46:47<00:50,  1.98it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2902/3000:  97%|█████████▋| 2902/3000 [46:47<00:51,  1.92it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2902/3000:  97%|█████████▋| 2902/3000 [46:47<00:51,  1.92it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2903/3000:  97%|█████████▋| 2902/3000 [46:47<00:51,  1.92it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2903/3000:  97%|█████████▋| 2903/3000 [46:48<00:50,  1.92it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2903/3000:  97%|█████████▋| 2903/3000 [46:48<00:50,  1.92it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2904/3000:  97%|█████████▋| 2903/3000 [46:48<00:50,  1.92it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2904/3000:  97%|█████████▋| 2904/3000 [46:48<00:47,  2.02it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2904/3000:  97%|█████████▋| 2904/3000 [46:48<00:47,  2.02it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2905/3000:  97%|█████████▋| 2904/3000 [46:48<00:47,  2.02it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2905/3000:  97%|█████████▋| 2905/3000 [46:49<00:45,  2.09it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2905/3000:  97%|█████████▋| 2905/3000 [46:49<00:45,  2.09it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2906/3000:  97%|█████████▋| 2905/3000 [46:49<00:45,  2.09it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2906/3000:  97%|█████████▋| 2906/3000 [46:49<00:43,  2.17it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2906/3000:  97%|█████████▋| 2906/3000 [46:49<00:43,  2.17it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2907/3000:  97%|█████████▋| 2906/3000 [46:49<00:43,  2.17it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2907/3000:  97%|█████████▋| 2907/3000 [46:50<00:40,  2.29it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2907/3000:  97%|█████████▋| 2907/3000 [46:50<00:40,  2.29it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2908/3000:  97%|█████████▋| 2907/3000 [46:50<00:40,  2.29it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2908/3000:  97%|█████████▋| 2908/3000 [46:50<00:41,  2.24it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2908/3000:  97%|█████████▋| 2908/3000 [46:50<00:41,  2.24it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2909/3000:  97%|█████████▋| 2908/3000 [46:50<00:41,  2.24it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2909/3000:  97%|█████████▋| 2909/3000 [46:51<00:42,  2.15it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2909/3000:  97%|█████████▋| 2909/3000 [46:51<00:42,  2.15it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2910/3000:  97%|█████████▋| 2909/3000 [46:51<00:42,  2.15it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2910/3000:  97%|█████████▋| 2910/3000 [46:51<00:41,  2.18it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2910/3000:  97%|█████████▋| 2910/3000 [46:51<00:41,  2.18it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2911/3000:  97%|█████████▋| 2910/3000 [46:51<00:41,  2.18it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2911/3000:  97%|█████████▋| 2911/3000 [46:51<00:40,  2.22it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2911/3000:  97%|█████████▋| 2911/3000 [46:51<00:40,  2.22it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2912/3000:  97%|█████████▋| 2911/3000 [46:51<00:40,  2.22it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2912/3000:  97%|█████████▋| 2912/3000 [46:52<00:39,  2.23it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2912/3000:  97%|█████████▋| 2912/3000 [46:52<00:39,  2.23it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2913/3000:  97%|█████████▋| 2912/3000 [46:52<00:39,  2.23it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2913/3000:  97%|█████████▋| 2913/3000 [46:52<00:37,  2.33it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2913/3000:  97%|█████████▋| 2913/3000 [46:52<00:37,  2.33it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2914/3000:  97%|█████████▋| 2913/3000 [46:52<00:37,  2.33it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2914/3000:  97%|█████████▋| 2914/3000 [46:53<00:36,  2.34it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2914/3000:  97%|█████████▋| 2914/3000 [46:53<00:36,  2.34it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2915/3000:  97%|█████████▋| 2914/3000 [46:53<00:36,  2.34it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2915/3000:  97%|█████████▋| 2915/3000 [46:53<00:39,  2.15it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2915/3000:  97%|█████████▋| 2915/3000 [46:53<00:39,  2.15it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2916/3000:  97%|█████████▋| 2915/3000 [46:53<00:39,  2.15it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2916/3000:  97%|█████████▋| 2916/3000 [46:54<00:39,  2.14it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2916/3000:  97%|█████████▋| 2916/3000 [46:54<00:39,  2.14it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2917/3000:  97%|█████████▋| 2916/3000 [46:54<00:39,  2.14it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2917/3000:  97%|█████████▋| 2917/3000 [46:54<00:40,  2.07it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2917/3000:  97%|█████████▋| 2917/3000 [46:54<00:40,  2.07it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2918/3000:  97%|█████████▋| 2917/3000 [46:54<00:40,  2.07it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2918/3000:  97%|█████████▋| 2918/3000 [46:55<00:38,  2.12it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2918/3000:  97%|█████████▋| 2918/3000 [46:55<00:38,  2.12it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2919/3000:  97%|█████████▋| 2918/3000 [46:55<00:38,  2.12it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2919/3000:  97%|█████████▋| 2919/3000 [46:55<00:37,  2.18it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2919/3000:  97%|█████████▋| 2919/3000 [46:55<00:37,  2.18it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2920/3000:  97%|█████████▋| 2919/3000 [46:55<00:37,  2.18it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2920/3000:  97%|█████████▋| 2920/3000 [46:55<00:32,  2.46it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2920/3000:  97%|█████████▋| 2920/3000 [46:55<00:32,  2.46it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2921/3000:  97%|█████████▋| 2920/3000 [46:55<00:32,  2.46it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2921/3000:  97%|█████████▋| 2921/3000 [46:56<00:34,  2.31it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2921/3000:  97%|█████████▋| 2921/3000 [46:56<00:34,  2.31it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2922/3000:  97%|█████████▋| 2921/3000 [46:56<00:34,  2.31it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2922/3000:  97%|█████████▋| 2922/3000 [46:56<00:35,  2.20it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2922/3000:  97%|█████████▋| 2922/3000 [46:56<00:35,  2.20it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2923/3000:  97%|█████████▋| 2922/3000 [46:56<00:35,  2.20it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2923/3000:  97%|█████████▋| 2923/3000 [46:57<00:34,  2.25it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2923/3000:  97%|█████████▋| 2923/3000 [46:57<00:34,  2.25it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2924/3000:  97%|█████████▋| 2923/3000 [46:57<00:34,  2.25it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2924/3000:  97%|█████████▋| 2924/3000 [46:57<00:33,  2.29it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2924/3000:  97%|█████████▋| 2924/3000 [46:57<00:33,  2.29it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2925/3000:  97%|█████████▋| 2924/3000 [46:57<00:33,  2.29it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2925/3000:  98%|█████████▊| 2925/3000 [46:58<00:31,  2.36it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2925/3000:  98%|█████████▊| 2925/3000 [46:58<00:31,  2.36it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2926/3000:  98%|█████████▊| 2925/3000 [46:58<00:31,  2.36it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2926/3000:  98%|█████████▊| 2926/3000 [46:58<00:31,  2.32it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2926/3000:  98%|█████████▊| 2926/3000 [46:58<00:31,  2.32it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2927/3000:  98%|█████████▊| 2926/3000 [46:58<00:31,  2.32it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2927/3000:  98%|█████████▊| 2927/3000 [46:58<00:31,  2.33it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2927/3000:  98%|█████████▊| 2927/3000 [46:58<00:31,  2.33it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2928/3000:  98%|█████████▊| 2927/3000 [46:58<00:31,  2.33it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2928/3000:  98%|█████████▊| 2928/3000 [46:59<00:30,  2.33it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2928/3000:  98%|█████████▊| 2928/3000 [46:59<00:30,  2.33it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2929/3000:  98%|█████████▊| 2928/3000 [46:59<00:30,  2.33it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2929/3000:  98%|█████████▊| 2929/3000 [46:59<00:31,  2.28it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2929/3000:  98%|█████████▊| 2929/3000 [46:59<00:31,  2.28it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2930/3000:  98%|█████████▊| 2929/3000 [46:59<00:31,  2.28it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2930/3000:  98%|█████████▊| 2930/3000 [47:00<00:30,  2.26it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2930/3000:  98%|█████████▊| 2930/3000 [47:00<00:30,  2.26it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2931/3000:  98%|█████████▊| 2930/3000 [47:00<00:30,  2.26it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2931/3000:  98%|█████████▊| 2931/3000 [47:00<00:30,  2.26it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2931/3000:  98%|█████████▊| 2931/3000 [47:00<00:30,  2.26it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2932/3000:  98%|█████████▊| 2931/3000 [47:00<00:30,  2.26it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2932/3000:  98%|█████████▊| 2932/3000 [47:01<00:29,  2.29it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2932/3000:  98%|█████████▊| 2932/3000 [47:01<00:29,  2.29it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2933/3000:  98%|█████████▊| 2932/3000 [47:01<00:29,  2.29it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2933/3000:  98%|█████████▊| 2933/3000 [47:01<00:29,  2.28it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2933/3000:  98%|█████████▊| 2933/3000 [47:01<00:29,  2.28it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2934/3000:  98%|█████████▊| 2933/3000 [47:01<00:29,  2.28it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2934/3000:  98%|█████████▊| 2934/3000 [47:01<00:27,  2.39it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2934/3000:  98%|█████████▊| 2934/3000 [47:01<00:27,  2.39it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2935/3000:  98%|█████████▊| 2934/3000 [47:01<00:27,  2.39it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2935/3000:  98%|█████████▊| 2935/3000 [47:02<00:26,  2.47it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2935/3000:  98%|█████████▊| 2935/3000 [47:02<00:26,  2.47it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2936/3000:  98%|█████████▊| 2935/3000 [47:02<00:26,  2.47it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2936/3000:  98%|█████████▊| 2936/3000 [47:02<00:27,  2.30it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2936/3000:  98%|█████████▊| 2936/3000 [47:02<00:27,  2.30it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2937/3000:  98%|█████████▊| 2936/3000 [47:02<00:27,  2.30it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2937/3000:  98%|█████████▊| 2937/3000 [47:03<00:28,  2.25it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2937/3000:  98%|█████████▊| 2937/3000 [47:03<00:28,  2.25it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2938/3000:  98%|█████████▊| 2937/3000 [47:03<00:28,  2.25it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2938/3000:  98%|█████████▊| 2938/3000 [47:03<00:28,  2.17it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2938/3000:  98%|█████████▊| 2938/3000 [47:03<00:28,  2.17it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2939/3000:  98%|█████████▊| 2938/3000 [47:03<00:28,  2.17it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2939/3000:  98%|█████████▊| 2939/3000 [47:04<00:27,  2.18it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2939/3000:  98%|█████████▊| 2939/3000 [47:04<00:27,  2.18it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2940/3000:  98%|█████████▊| 2939/3000 [47:04<00:27,  2.18it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2940/3000:  98%|█████████▊| 2940/3000 [47:04<00:26,  2.26it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2940/3000:  98%|█████████▊| 2940/3000 [47:04<00:26,  2.26it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2941/3000:  98%|█████████▊| 2940/3000 [47:04<00:26,  2.26it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2941/3000:  98%|█████████▊| 2941/3000 [47:05<00:25,  2.31it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2941/3000:  98%|█████████▊| 2941/3000 [47:05<00:25,  2.31it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2942/3000:  98%|█████████▊| 2941/3000 [47:05<00:25,  2.31it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2942/3000:  98%|█████████▊| 2942/3000 [47:05<00:25,  2.25it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2942/3000:  98%|█████████▊| 2942/3000 [47:05<00:25,  2.25it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2943/3000:  98%|█████████▊| 2942/3000 [47:05<00:25,  2.25it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2943/3000:  98%|█████████▊| 2943/3000 [47:05<00:24,  2.35it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2943/3000:  98%|█████████▊| 2943/3000 [47:05<00:24,  2.35it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2944/3000:  98%|█████████▊| 2943/3000 [47:05<00:24,  2.35it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2944/3000:  98%|█████████▊| 2944/3000 [47:06<00:24,  2.32it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2944/3000:  98%|█████████▊| 2944/3000 [47:06<00:24,  2.32it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2945/3000:  98%|█████████▊| 2944/3000 [47:06<00:24,  2.32it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2945/3000:  98%|█████████▊| 2945/3000 [47:06<00:24,  2.23it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2945/3000:  98%|█████████▊| 2945/3000 [47:06<00:24,  2.23it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.43e+6]Epoch 2946/3000:  98%|█████████▊| 2945/3000 [47:06<00:24,  2.23it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.43e+6]Epoch 2946/3000:  98%|█████████▊| 2946/3000 [47:07<00:23,  2.32it/s, v_num=1, train_loss_step=1.48e+6, train_loss_epoch=1.43e+6]Epoch 2946/3000:  98%|█████████▊| 2946/3000 [47:07<00:23,  2.32it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2947/3000:  98%|█████████▊| 2946/3000 [47:07<00:23,  2.32it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2947/3000:  98%|█████████▊| 2947/3000 [47:07<00:22,  2.33it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2947/3000:  98%|█████████▊| 2947/3000 [47:07<00:22,  2.33it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2948/3000:  98%|█████████▊| 2947/3000 [47:07<00:22,  2.33it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2948/3000:  98%|█████████▊| 2948/3000 [47:08<00:23,  2.22it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2948/3000:  98%|█████████▊| 2948/3000 [47:08<00:23,  2.22it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2949/3000:  98%|█████████▊| 2948/3000 [47:08<00:23,  2.22it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2949/3000:  98%|█████████▊| 2949/3000 [47:08<00:24,  2.08it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2949/3000:  98%|█████████▊| 2949/3000 [47:08<00:24,  2.08it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2950/3000:  98%|█████████▊| 2949/3000 [47:08<00:24,  2.08it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2950/3000:  98%|█████████▊| 2950/3000 [47:09<00:23,  2.11it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2950/3000:  98%|█████████▊| 2950/3000 [47:09<00:23,  2.11it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2951/3000:  98%|█████████▊| 2950/3000 [47:09<00:23,  2.11it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2951/3000:  98%|█████████▊| 2951/3000 [47:09<00:22,  2.15it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2951/3000:  98%|█████████▊| 2951/3000 [47:09<00:22,  2.15it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2952/3000:  98%|█████████▊| 2951/3000 [47:09<00:22,  2.15it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2952/3000:  98%|█████████▊| 2952/3000 [47:10<00:22,  2.15it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2952/3000:  98%|█████████▊| 2952/3000 [47:10<00:22,  2.15it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2953/3000:  98%|█████████▊| 2952/3000 [47:10<00:22,  2.15it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2953/3000:  98%|█████████▊| 2953/3000 [47:10<00:21,  2.23it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2953/3000:  98%|█████████▊| 2953/3000 [47:10<00:21,  2.23it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2954/3000:  98%|█████████▊| 2953/3000 [47:10<00:21,  2.23it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2954/3000:  98%|█████████▊| 2954/3000 [47:10<00:19,  2.31it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2954/3000:  98%|█████████▊| 2954/3000 [47:10<00:19,  2.31it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2955/3000:  98%|█████████▊| 2954/3000 [47:10<00:19,  2.31it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2955/3000:  98%|█████████▊| 2955/3000 [47:11<00:20,  2.22it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2955/3000:  98%|█████████▊| 2955/3000 [47:11<00:20,  2.22it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2956/3000:  98%|█████████▊| 2955/3000 [47:11<00:20,  2.22it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2956/3000:  99%|█████████▊| 2956/3000 [47:11<00:20,  2.15it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2956/3000:  99%|█████████▊| 2956/3000 [47:11<00:20,  2.15it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.43e+6]Epoch 2957/3000:  99%|█████████▊| 2956/3000 [47:11<00:20,  2.15it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.43e+6]Epoch 2957/3000:  99%|█████████▊| 2957/3000 [47:12<00:19,  2.24it/s, v_num=1, train_loss_step=1.49e+6, train_loss_epoch=1.43e+6]Epoch 2957/3000:  99%|█████████▊| 2957/3000 [47:12<00:19,  2.24it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2958/3000:  99%|█████████▊| 2957/3000 [47:12<00:19,  2.24it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2958/3000:  99%|█████████▊| 2958/3000 [47:12<00:18,  2.24it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2958/3000:  99%|█████████▊| 2958/3000 [47:12<00:18,  2.24it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2959/3000:  99%|█████████▊| 2958/3000 [47:12<00:18,  2.24it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2959/3000:  99%|█████████▊| 2959/3000 [47:13<00:18,  2.19it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2959/3000:  99%|█████████▊| 2959/3000 [47:13<00:18,  2.19it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2960/3000:  99%|█████████▊| 2959/3000 [47:13<00:18,  2.19it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2960/3000:  99%|█████████▊| 2960/3000 [47:13<00:18,  2.19it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2960/3000:  99%|█████████▊| 2960/3000 [47:13<00:18,  2.19it/s, v_num=1, train_loss_step=1.38e+6, train_loss_epoch=1.43e+6]Epoch 2961/3000:  99%|█████████▊| 2960/3000 [47:13<00:18,  2.19it/s, v_num=1, train_loss_step=1.38e+6, train_loss_epoch=1.43e+6]Epoch 2961/3000:  99%|█████████▊| 2961/3000 [47:14<00:17,  2.24it/s, v_num=1, train_loss_step=1.38e+6, train_loss_epoch=1.43e+6]Epoch 2961/3000:  99%|█████████▊| 2961/3000 [47:14<00:17,  2.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2962/3000:  99%|█████████▊| 2961/3000 [47:14<00:17,  2.24it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2962/3000:  99%|█████████▊| 2962/3000 [47:14<00:17,  2.20it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2962/3000:  99%|█████████▊| 2962/3000 [47:14<00:17,  2.20it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2963/3000:  99%|█████████▊| 2962/3000 [47:14<00:17,  2.20it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2963/3000:  99%|█████████▉| 2963/3000 [47:15<00:16,  2.25it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2963/3000:  99%|█████████▉| 2963/3000 [47:15<00:16,  2.25it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2964/3000:  99%|█████████▉| 2963/3000 [47:15<00:16,  2.25it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2964/3000:  99%|█████████▉| 2964/3000 [47:15<00:16,  2.21it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2964/3000:  99%|█████████▉| 2964/3000 [47:15<00:16,  2.21it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2965/3000:  99%|█████████▉| 2964/3000 [47:15<00:16,  2.21it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2965/3000:  99%|█████████▉| 2965/3000 [47:15<00:15,  2.19it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2965/3000:  99%|█████████▉| 2965/3000 [47:15<00:15,  2.19it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2966/3000:  99%|█████████▉| 2965/3000 [47:15<00:15,  2.19it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2966/3000:  99%|█████████▉| 2966/3000 [47:16<00:15,  2.19it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2966/3000:  99%|█████████▉| 2966/3000 [47:16<00:15,  2.19it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2967/3000:  99%|█████████▉| 2966/3000 [47:16<00:15,  2.19it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2967/3000:  99%|█████████▉| 2967/3000 [47:16<00:15,  2.12it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2967/3000:  99%|█████████▉| 2967/3000 [47:16<00:15,  2.12it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2968/3000:  99%|█████████▉| 2967/3000 [47:16<00:15,  2.12it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2968/3000:  99%|█████████▉| 2968/3000 [47:17<00:15,  2.11it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2968/3000:  99%|█████████▉| 2968/3000 [47:17<00:15,  2.11it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2969/3000:  99%|█████████▉| 2968/3000 [47:17<00:15,  2.11it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2969/3000:  99%|█████████▉| 2969/3000 [47:17<00:14,  2.11it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2969/3000:  99%|█████████▉| 2969/3000 [47:17<00:14,  2.11it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2970/3000:  99%|█████████▉| 2969/3000 [47:17<00:14,  2.11it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2970/3000:  99%|█████████▉| 2970/3000 [47:18<00:13,  2.15it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2970/3000:  99%|█████████▉| 2970/3000 [47:18<00:13,  2.15it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2971/3000:  99%|█████████▉| 2970/3000 [47:18<00:13,  2.15it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2971/3000:  99%|█████████▉| 2971/3000 [47:18<00:13,  2.10it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2971/3000:  99%|█████████▉| 2971/3000 [47:18<00:13,  2.10it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2972/3000:  99%|█████████▉| 2971/3000 [47:18<00:13,  2.10it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2972/3000:  99%|█████████▉| 2972/3000 [47:19<00:13,  2.13it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2972/3000:  99%|█████████▉| 2972/3000 [47:19<00:13,  2.13it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2973/3000:  99%|█████████▉| 2972/3000 [47:19<00:13,  2.13it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2973/3000:  99%|█████████▉| 2973/3000 [47:19<00:12,  2.14it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2973/3000:  99%|█████████▉| 2973/3000 [47:19<00:12,  2.14it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2974/3000:  99%|█████████▉| 2973/3000 [47:19<00:12,  2.14it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2974/3000:  99%|█████████▉| 2974/3000 [47:20<00:11,  2.18it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2974/3000:  99%|█████████▉| 2974/3000 [47:20<00:11,  2.18it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2975/3000:  99%|█████████▉| 2974/3000 [47:20<00:11,  2.18it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2975/3000:  99%|█████████▉| 2975/3000 [47:20<00:11,  2.15it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2975/3000:  99%|█████████▉| 2975/3000 [47:20<00:11,  2.15it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2976/3000:  99%|█████████▉| 2975/3000 [47:20<00:11,  2.15it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2976/3000:  99%|█████████▉| 2976/3000 [47:21<00:10,  2.23it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2976/3000:  99%|█████████▉| 2976/3000 [47:21<00:10,  2.23it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2977/3000:  99%|█████████▉| 2976/3000 [47:21<00:10,  2.23it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2977/3000:  99%|█████████▉| 2977/3000 [47:21<00:10,  2.14it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2977/3000:  99%|█████████▉| 2977/3000 [47:21<00:10,  2.14it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2978/3000:  99%|█████████▉| 2977/3000 [47:21<00:10,  2.14it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2978/3000:  99%|█████████▉| 2978/3000 [47:21<00:09,  2.26it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2978/3000:  99%|█████████▉| 2978/3000 [47:21<00:09,  2.26it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2979/3000:  99%|█████████▉| 2978/3000 [47:21<00:09,  2.26it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2979/3000:  99%|█████████▉| 2979/3000 [47:22<00:08,  2.37it/s, v_num=1, train_loss_step=1.47e+6, train_loss_epoch=1.43e+6]Epoch 2979/3000:  99%|█████████▉| 2979/3000 [47:22<00:08,  2.37it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2980/3000:  99%|█████████▉| 2979/3000 [47:22<00:08,  2.37it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2980/3000:  99%|█████████▉| 2980/3000 [47:22<00:08,  2.30it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2980/3000:  99%|█████████▉| 2980/3000 [47:22<00:08,  2.30it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2981/3000:  99%|█████████▉| 2980/3000 [47:22<00:08,  2.30it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2981/3000:  99%|█████████▉| 2981/3000 [47:23<00:08,  2.19it/s, v_num=1, train_loss_step=1.44e+6, train_loss_epoch=1.43e+6]Epoch 2981/3000:  99%|█████████▉| 2981/3000 [47:23<00:08,  2.19it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2982/3000:  99%|█████████▉| 2981/3000 [47:23<00:08,  2.19it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2982/3000:  99%|█████████▉| 2982/3000 [47:23<00:08,  2.14it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2982/3000:  99%|█████████▉| 2982/3000 [47:23<00:08,  2.14it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6] Epoch 2983/3000:  99%|█████████▉| 2982/3000 [47:23<00:08,  2.14it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2983/3000:  99%|█████████▉| 2983/3000 [47:24<00:07,  2.28it/s, v_num=1, train_loss_step=1.4e+6, train_loss_epoch=1.43e+6]Epoch 2983/3000:  99%|█████████▉| 2983/3000 [47:24<00:07,  2.28it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2984/3000:  99%|█████████▉| 2983/3000 [47:24<00:07,  2.28it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2984/3000:  99%|█████████▉| 2984/3000 [47:24<00:07,  2.16it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2984/3000:  99%|█████████▉| 2984/3000 [47:24<00:07,  2.16it/s, v_num=1, train_loss_step=1.38e+6, train_loss_epoch=1.43e+6]Epoch 2985/3000:  99%|█████████▉| 2984/3000 [47:24<00:07,  2.16it/s, v_num=1, train_loss_step=1.38e+6, train_loss_epoch=1.43e+6]Epoch 2985/3000: 100%|█████████▉| 2985/3000 [47:25<00:06,  2.15it/s, v_num=1, train_loss_step=1.38e+6, train_loss_epoch=1.43e+6]Epoch 2985/3000: 100%|█████████▉| 2985/3000 [47:25<00:06,  2.15it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2986/3000: 100%|█████████▉| 2985/3000 [47:25<00:06,  2.15it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2986/3000: 100%|█████████▉| 2986/3000 [47:25<00:06,  2.10it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2986/3000: 100%|█████████▉| 2986/3000 [47:25<00:06,  2.10it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2987/3000: 100%|█████████▉| 2986/3000 [47:25<00:06,  2.10it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2987/3000: 100%|█████████▉| 2987/3000 [47:26<00:05,  2.19it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2987/3000: 100%|█████████▉| 2987/3000 [47:26<00:05,  2.19it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2988/3000: 100%|█████████▉| 2987/3000 [47:26<00:05,  2.19it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2988/3000: 100%|█████████▉| 2988/3000 [47:26<00:05,  2.28it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2988/3000: 100%|█████████▉| 2988/3000 [47:26<00:05,  2.28it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2989/3000: 100%|█████████▉| 2988/3000 [47:26<00:05,  2.28it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2989/3000: 100%|█████████▉| 2989/3000 [47:26<00:04,  2.25it/s, v_num=1, train_loss_step=1.46e+6, train_loss_epoch=1.43e+6]Epoch 2989/3000: 100%|█████████▉| 2989/3000 [47:26<00:04,  2.25it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2990/3000: 100%|█████████▉| 2989/3000 [47:26<00:04,  2.25it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2990/3000: 100%|█████████▉| 2990/3000 [47:27<00:04,  2.47it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2990/3000: 100%|█████████▉| 2990/3000 [47:27<00:04,  2.47it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2991/3000: 100%|█████████▉| 2990/3000 [47:27<00:04,  2.47it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2991/3000: 100%|█████████▉| 2991/3000 [47:27<00:03,  2.38it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2991/3000: 100%|█████████▉| 2991/3000 [47:27<00:03,  2.38it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2992/3000: 100%|█████████▉| 2991/3000 [47:27<00:03,  2.38it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2992/3000: 100%|█████████▉| 2992/3000 [47:28<00:03,  2.37it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2992/3000: 100%|█████████▉| 2992/3000 [47:28<00:03,  2.37it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2993/3000: 100%|█████████▉| 2992/3000 [47:28<00:03,  2.37it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2993/3000: 100%|█████████▉| 2993/3000 [47:28<00:03,  2.27it/s, v_num=1, train_loss_step=1.41e+6, train_loss_epoch=1.43e+6]Epoch 2993/3000: 100%|█████████▉| 2993/3000 [47:28<00:03,  2.27it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2994/3000: 100%|█████████▉| 2993/3000 [47:28<00:03,  2.27it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2994/3000: 100%|█████████▉| 2994/3000 [47:28<00:02,  2.36it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2994/3000: 100%|█████████▉| 2994/3000 [47:28<00:02,  2.36it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2995/3000: 100%|█████████▉| 2994/3000 [47:28<00:02,  2.36it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2995/3000: 100%|█████████▉| 2995/3000 [47:29<00:02,  2.21it/s, v_num=1, train_loss_step=1.45e+6, train_loss_epoch=1.43e+6]Epoch 2995/3000: 100%|█████████▉| 2995/3000 [47:29<00:02,  2.21it/s, v_num=1, train_loss_step=1.38e+6, train_loss_epoch=1.43e+6]Epoch 2996/3000: 100%|█████████▉| 2995/3000 [47:29<00:02,  2.21it/s, v_num=1, train_loss_step=1.38e+6, train_loss_epoch=1.43e+6]Epoch 2996/3000: 100%|█████████▉| 2996/3000 [47:29<00:01,  2.36it/s, v_num=1, train_loss_step=1.38e+6, train_loss_epoch=1.43e+6]Epoch 2996/3000: 100%|█████████▉| 2996/3000 [47:29<00:01,  2.36it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2997/3000: 100%|█████████▉| 2996/3000 [47:29<00:01,  2.36it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2997/3000: 100%|█████████▉| 2997/3000 [47:30<00:01,  2.56it/s, v_num=1, train_loss_step=1.39e+6, train_loss_epoch=1.43e+6]Epoch 2997/3000: 100%|█████████▉| 2997/3000 [47:30<00:01,  2.56it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2998/3000: 100%|█████████▉| 2997/3000 [47:30<00:01,  2.56it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2998/3000: 100%|█████████▉| 2998/3000 [47:30<00:00,  2.47it/s, v_num=1, train_loss_step=1.43e+6, train_loss_epoch=1.43e+6]Epoch 2998/3000: 100%|█████████▉| 2998/3000 [47:30<00:00,  2.47it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2999/3000: 100%|█████████▉| 2998/3000 [47:30<00:00,  2.47it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2999/3000: 100%|█████████▉| 2999/3000 [47:31<00:00,  2.38it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 2999/3000: 100%|█████████▉| 2999/3000 [47:31<00:00,  2.38it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 3000/3000: 100%|█████████▉| 2999/3000 [47:31<00:00,  2.38it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 3000/3000: 100%|██████████| 3000/3000 [47:31<00:00,  2.31it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 3000/3000: 100%|██████████| 3000/3000 [47:31<00:00,  2.31it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]Epoch 3000/3000: 100%|██████████| 3000/3000 [47:31<00:00,  1.05it/s, v_num=1, train_loss_step=1.42e+6, train_loss_epoch=1.43e+6]
    +
    +
    +
    +
    +

    +
    +
    +

    Get the results from the model, also put them in the .obs slot.

    -
    # this chunk has issues and therefore not evaluated
    -
    -st_adata.obsm["deconvolution"] = spatial_model.get_proportions()
    -
    -# also copy to the obsm data frame
    -for ct in st_adata.obsm["deconvolution"].columns:
    -    st_adata.obs[ct] = st_adata.obsm["deconvolution"][ct]
    +
    st_adata.obsm["deconvolution"] = spatial_model.get_proportions()
    +
    +# also copy to the obsm data frame
    +for ct in st_adata.obsm["deconvolution"].columns:
    +    st_adata.obs[ct] = st_adata.obsm["deconvolution"][ct]

    We are then able to explore how cell types in the scRNA-seq dataset are predicted onto the visium dataset. Let’s first visualize the neurons cortical layers.

    -
    # this chunk has issues and therefore not evaluated
    -
    -sc.pl.spatial(
    -    st_adata,
    -    img_key="hires",
    -    color=["L2/3 IT", "L4", "L5 PT", "L6 CT"],
    -    library_id=lib_a,
    -    size=1.5,
    -    ncols=2
    -)
    +
    sc.pl.spatial(
    +    st_adata,
    +    img_key="hires",
    +    color=["L2/3 IT", "L4", "L5 PT", "L6 CT"],
    +    library_id=lib_a,
    +    size=1.5,
    +    ncols=2
    +)
    +
    +
    +
    +

    +
    +
    +

    We can go ahead an visualize astrocytes and oligodendrocytes as well.

    -
    # this chunk has issues and therefore not evaluated
    -
    -sc.pl.spatial(
    -    st_adata, img_key="hires", color=["Oligo", "Astro"], size=1.5, library_id=lib_a
    -)
    +
    sc.pl.spatial(
    +    st_adata, img_key="hires", color=["Oligo", "Astro"], size=1.5, library_id=lib_a
    +)
    +
    +
    +
    +

    +
    +
    +

    Keep in mind that the deconvolution results are just predictions, depending on how well your scRNAseq data covers the celltypes that are present in the ST data and on how parameters, gene selection etc. are tuned you may get different results.

    -
    # this chunk has issues and therefore not evaluated
    -
    -sc.pl.violin(st_adata, ["L2/3 IT", "L6 CT","Oligo","Astro"],
    -            jitter=0.4, groupby = 'clusters', rotation= 45)
    +
    sc.pl.violin(st_adata, ["L2/3 IT", "L6 CT","Oligo","Astro"],
    +            jitter=0.4, groupby = 'clusters', rotation= 45)
    +
    +
    +
    +

    +
    +
    +
    @@ -1902,23 +1730,28 @@

    Subset for another region that does not contain cortex cells and check what you get from the label transfer. Suggested region is the right end of the posterial section that you can select like this:

    -
    # this chunk has issues and therefore not evaluated
    -
    -lib_p = "V1_Mouse_Brain_Sagittal_Posterior"
    -
    -adata_subregion = adata[
    -    (adata.obs.library_id == lib_p)
    -    & (adata.obsm["spatial"][:, 0] > 6500),
    -    :,
    -].copy()
    -
    -sc.pl.spatial(
    -    adata_subregion,
    -    img_key="hires",
    -    library_id=lib_p,
    -    color=['n_genes_by_counts'],
    -    size=1.5
    -)
    +
    lib_p = "V1_Mouse_Brain_Sagittal_Posterior"
    +
    +adata_subregion = adata[
    +    (adata.obs.library_id == lib_p)
    +    & (adata.obsm["spatial"][:, 0] > 6500),
    +    :,
    +].copy()
    +
    +sc.pl.spatial(
    +    adata_subregion,
    +    img_key="hires",
    +    library_id=lib_p,
    +    color=['n_genes_by_counts'],
    +    size=1.5
    +)
    +
    +
    +
    +

    +
    +
    +

    @@ -1931,34 +1764,47 @@

    -
    sc.logging.print_versions()
    +
    sc.logging.print_versions()
    -----
     anndata     0.10.3
     scanpy      1.9.6
     -----
     PIL                 10.0.0
    +absl                NA
    +annotated_types     0.6.0
     annoy               NA
     anyio               NA
     asttokens           NA
     attr                23.1.0
     babel               2.12.1
     backcall            0.2.0
    +backoff             2.2.1
    +bs4                 4.12.2
     certifi             2023.11.17
     cffi                1.15.1
     charset_normalizer  3.1.0
    +chex                0.1.83
    +click               8.1.7
     colorama            0.4.6
     comm                0.1.3
    +contextlib2         NA
    +croniter            NA
     cycler              0.12.1
     cython_runtime      NA
     dateutil            2.8.2
     debugpy             1.6.7
     decorator           5.1.1
    +deepdiff            6.7.1
     defusedxml          0.7.1
    +docrep              0.3.2
     exceptiongroup      1.2.0
     executing           1.2.0
    +fastapi             0.103.0
     fastjsonschema      NA
     fbpca               NA
    +flax                0.6.1
    +fsspec              2023.12.2
     gmpy2               2.1.2
     h5py                3.9.0
     idna                3.4
    @@ -1966,6 +1812,8 @@ 

    Published with Quarto v1.3.450

    - + diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-10-output-1.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-10-output-1.png index 50f6ac31..10a15d9f 100644 Binary files a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-10-output-1.png and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-10-output-1.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-10-output-2.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-10-output-2.png index 8f700e83..1438f98b 100644 Binary files a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-10-output-2.png and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-10-output-2.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-11-output-2.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-11-output-2.png index 5f426c5e..5e2d97da 100644 Binary files a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-11-output-2.png and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-11-output-2.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-14-output-1.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-14-output-1.png index f3966fd3..841a2088 100644 Binary files a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-14-output-1.png and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-14-output-1.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-14-output-2.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-14-output-2.png index caaa66f0..93c969a4 100644 Binary files a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-14-output-2.png and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-14-output-2.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-16-output-2.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-16-output-2.png index 43fb1ab8..582da3a5 100644 Binary files a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-16-output-2.png and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-16-output-2.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-17-output-1.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-17-output-1.png index 877d43d6..a80c06fc 100644 Binary files a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-17-output-1.png and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-17-output-1.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-20-output-2.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-20-output-2.png index 47837ca4..0f6a5917 100644 Binary files a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-20-output-2.png and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-20-output-2.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-21-output-1.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-21-output-1.png index a9b8ade9..a208f358 100644 Binary files a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-21-output-1.png and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-21-output-1.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-22-output-2.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-22-output-2.png index 2780f8dc..d0d6ffff 100644 Binary files a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-22-output-2.png and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-22-output-2.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-23-output-1.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-23-output-1.png index da271f1b..8528280f 100644 Binary files a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-23-output-1.png and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-23-output-1.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-23-output-2.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-23-output-2.png index 51e4c1ad..1b29c68c 100644 Binary files a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-23-output-2.png and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-23-output-2.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-31-output-2.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-32-output-2.png similarity index 100% rename from docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-31-output-2.png rename to docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-32-output-2.png diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-35-output-1.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-35-output-1.png deleted file mode 100644 index 30800955..00000000 Binary files a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-35-output-1.png and /dev/null differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-34-output-2.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-35-output-2.png similarity index 100% rename from docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-34-output-2.png rename to docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-35-output-2.png diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-36-output-1.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-36-output-1.png index 18c247d2..30800955 100644 Binary files a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-36-output-1.png and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-36-output-1.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-37-output-1.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-37-output-1.png new file mode 100644 index 00000000..6a01513a Binary files /dev/null and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-37-output-1.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-37-output-2.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-37-output-2.png deleted file mode 100644 index d49c470a..00000000 Binary files a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-37-output-2.png and /dev/null differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-38-output-2.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-38-output-2.png new file mode 100644 index 00000000..fa6c4a7f Binary files /dev/null and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-38-output-2.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-43-output-2.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-43-output-2.png new file mode 100644 index 00000000..6df85358 Binary files /dev/null and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-43-output-2.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-45-output-2.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-45-output-2.png new file mode 100644 index 00000000..01427a0c Binary files /dev/null and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-45-output-2.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-47-output-1.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-47-output-1.png new file mode 100644 index 00000000..6f2b0115 Binary files /dev/null and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-47-output-1.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-48-output-1.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-48-output-1.png new file mode 100644 index 00000000..717e0912 Binary files /dev/null and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-48-output-1.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-49-output-1.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-49-output-1.png new file mode 100644 index 00000000..0624918c Binary files /dev/null and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-49-output-1.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-50-output-1.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-50-output-1.png new file mode 100644 index 00000000..8eecc1eb Binary files /dev/null and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-50-output-1.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-7-output-1.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-7-output-1.png index a9ba1686..4efffc6e 100644 Binary files a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-7-output-1.png and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-7-output-1.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-8-output-1.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-8-output-1.png index 2487da72..7419b659 100644 Binary files a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-8-output-1.png and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-8-output-1.png differ diff --git a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-8-output-2.png b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-8-output-2.png index 6ec3b0dc..b9b177db 100644 Binary files a/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-8-output-2.png and b/docs/labs/scanpy/scanpy_08_spatial_files/figure-html/cell-8-output-2.png differ diff --git a/docs/labs/seurat/seurat_07_trajectory.html b/docs/labs/seurat/seurat_07_trajectory.html index 4f6c5785..1884d497 100644 --- a/docs/labs/seurat/seurat_07_trajectory.html +++ b/docs/labs/seurat/seurat_07_trajectory.html @@ -215,7 +215,7 @@

    Trajector
    Published
    -

    18-Jan-2024

    +

    23-Jan-2024

    @@ -242,8 +242,9 @@

    On this page

  • 6.2 Genes that change between two pseudotime points
  • 6.3 Genes that are different between lineages
  • -
  • 7 References
  • -
  • 8 Session info
  • +
  • 7 Generating batch-corrected data for differential gene expression
  • +
  • 8 References
  • +
  • 9 Session info
  • @@ -319,10 +320,11 @@

    The cell clustering information (for example: from Louvain, k-means)
  • A KNN/SNN graph (this is useful to inspect and sanity-check your trajectories)
  • +

    We already have pre-computed and subsetted the dataset (with 6688 cells and 3585 genes) following the analysis steps in this course. We then saved the objects, so you can use common tools to open and start to work with them (either in R or Python).

    +

    In addition there was some manual filtering done to remove clusters that are disconnected and cells that are hard to cluster, which can be seen in this script

    3 Reading data

    -

    We already have pre-computed and subsetted the dataset (with 6688 cells and 3585 genes) following the analysis steps in this course. We then saved the objects, so you can use common tools to open and start to work with them (either in R or Python).

    obj <- readRDS("data/trajectory/trajectory_seurat_filtered.rds")
     
    @@ -454,7 +456,11 @@ 

    p_State

    +<<<<<<< HEAD + +======= +>>>>>>> 0eb285391a39f912456fb8225629c193d72f7bc2
    @@ -874,13 +880,18 @@

    vignette for a more in-depth overview of tradeSeq and many other differential expression tests.

    -
    -

    7 References

    +
    +

    7 Generating batch-corrected data for differential gene expression

    +

    Before computing differential gene expression, sometimes it is a good idea to make sure our dataset is somewhat homogeneous (without very strong batch effects). In this dataset, we actually used data from 4 different technologies (Drop-seq, SmartSeq2 and 10X) and therefore massive differences in read counts can be observed:

    +

    If you want to know more about how to control for this issue, please have a look at batch_corrected_counts.Rmd

    +
    +
    +

    8 References

    Cannoodt, Robrecht, Wouter Saelens, and Yvan Saeys. 2016. “Computational Methods for Trajectory Inference from Single-Cell Transcriptomics.” European Journal of Immunology 46 (11): 2496–2506. doi.

    Saelens, Wouter, Robrecht Cannoodt, Helena Todorov, and Yvan Saeys. 2019. “A Comparison of Single-Cell Trajectory Inference Methods.” Nature Biotechnology 37 (5): 547–54. doi.

    -
    -

    8 Session info

    +
    +

    9 Session info

    Click here @@ -1242,7 +1253,11 @@

    Published with Quarto v1.3.450

    +<<<<<<< HEAD + +======= +>>>>>>> 0eb285391a39f912456fb8225629c193d72f7bc2 diff --git a/labs/scanpy/scanpy_07_trajectory.qmd b/labs/scanpy/scanpy_07_trajectory.qmd index 87bc0e30..c21623e1 100644 --- a/labs/scanpy/scanpy_07_trajectory.qmd +++ b/labs/scanpy/scanpy_07_trajectory.qmd @@ -10,7 +10,7 @@ engine: jupyter Code chunks run Python commands unless it starts with `%%bash`, in which case, those chunks run shell commands. ::: -Partly following [this tutorial](https://scanpy-tutorials.readthedocs.io/en/latest/paga-paul15.html). +Partly following this PAGA [tutorial](https://scanpy-tutorials.readthedocs.io/en/latest/paga-paul15.html) with some modifications. ## Loading libraries @@ -33,30 +33,33 @@ sc.settings.verbosity = 3 sc.settings.set_figure_params(dpi=100, frameon=False, figsize=(5, 5), facecolor='white', color_map = 'viridis_r') ``` -## Loading data +## Preparing data -In order to speed up the computations during the exercises, we will be using a subset of a bone marrow dataset (originally containing about -100K cells). The bone marrow is the source of adult immune cells, and contains virtually all differentiation stages of cell from the immune -system which later circulate in the blood to all other organs. +In order to speed up the computations during the exercises, we will be using a subset of a bone marrow dataset (originally containing about 100K cells). The bone marrow is the source of adult immune cells, and contains virtually all differentiation stages of cell from the immune system which later circulate in the blood to all other organs. ![](../figs/hematopoiesis.png) -All the data has been preprocessed with Seurat. The file trajectory_scanpy_filtered.h5ad was converted from the Seurat object -using the SeuratDisk package. For more information on how it was done, have a look at the script: convert_to_h5ad.R in the github repo. +If you have been using the **Seurat**, **Bioconductor** or **Scanpy** toolkits with your own data, you need to reach to the point where can find get: -```{python} -import os +- A dimensionality reduction where to perform the trajectory (for example: PCA, ICA, MNN, harmony, Diffusion Maps, UMAP) +- The cell clustering information (for example: from Louvain, k-means) +- A KNN/SNN graph (this is useful to inspect and sanity-check your trajectories) -path_data = "https://export.uppmax.uu.se/naiss2023-23-3/workshops/workshop-scrnaseq" -path_trajectory = "./data/trajectory" -if not os.path.exists(path_trajectory): - os.makedirs(path_trajectory, exist_ok=True) -``` + +In this case, all the data has been preprocessed with Seurat with standard pipelines. In addition there was some manual filtering done to remove clusters that are disconnected and cells that are hard to cluster, which can be seen in this [script](https://github.com/NBISweden/workshop-scRNAseq/blob/master/scripts/data_processing/slingshot_preprocessing.Rmd) + + +The file trajectory_scanpy_filtered.h5ad was converted from the Seurat object using the SeuratDisk package. For more information on how it was done, have a look at the script: [convert_to_h5ad.R](https://github.com/NBISweden/workshop-scRNAseq/blob/master/scripts/data_processing/convert_to_h5ad.R) in the github repo. + +You can download the data with the commands: ```{python} +import os import urllib.request +path_data = "https://export.uppmax.uu.se/naiss2023-23-3/workshops/workshop-scrnaseq" + path_results = "data/trajectory" if not os.path.exists(path_results): os.makedirs(path_results, exist_ok=True) @@ -68,7 +71,9 @@ if not os.path.exists(path_file): urllib.request.urlretrieve(file_url, path_file) ``` -Check that the variable names are correct. +## Reading data + +We already have pre-computed and subsetted the dataset (with 6688 cells and 3585 genes) following the analysis steps in this course. We then saved the objects, so you can use common tools to open and start to work with them (either in R or Python). ```{python} adata = sc.read_h5ad("data/trajectory/trajectory_seurat_filtered.h5ad") @@ -88,9 +93,7 @@ There is a umap and clusters provided with the object, first plot some informati sc.pl.umap(adata, color = ['clusters','dataset','batches','Phase'],legend_loc = 'on data', legend_fontsize = 'xx-small', ncols = 2) ``` -It is crucial that you performing analysis of a dataset understands what is going on, what are the clusters you see in your data and most -importantly How are the clusters related to each other?. Well, let’s explore the data a bit. With the help of this table, write down which -cluster numbers in your dataset express these key markers. +It is crucial that you performing analysis of a dataset understands what is going on, what are the clusters you see in your data and most importantly How are the clusters related to each other?. Well, let’s explore the data a bit. With the help of this table, write down which cluster numbers in your dataset express these key markers. |Marker |Cell Type| |--------|----------------------------| @@ -185,7 +188,7 @@ sc.pl.paga(adata, color='annot', edge_width_scale = 0.3) As you can see, we have edges between many clusters that we know are are unrelated, so we may need to clean up the data a bit more. -## Data pre-processing prior trajectory inference +## Filtering graph edges First, lets explore the graph a bit. So we plot the umap with the graph connections on top. @@ -200,7 +203,7 @@ sc.pp.neighbors(adata, n_neighbors=5, use_rep = 'X_harmony_Phase', n_pcs = 30) sc.pl.umap(adata, edges=True, color = 'annot', legend_loc= 'on data', legend_fontsize= 'xx-small') ``` -## Rerun PAGA again on the data +### Rerun PAGA again on the data ```{python} sc.tl.draw_graph(adata, init_pos='X_umap') @@ -212,9 +215,9 @@ sc.tl.paga(adata, groups='annot') sc.pl.paga(adata, color='annot', edge_width_scale = 0.3) ``` -## Recomputing the embedding using PAGA-initialization +## Embedding using PAGA-initialization -The following is just as well possible for a UMAP. +We can now redraw the graph using another starting position from the paga layout. The following is just as well possible for a UMAP. ```{python} sc.tl.draw_graph(adata, init_pos='paga') @@ -234,7 +237,9 @@ sc.pl.paga_compare( legend_fontsize=12, fontsize=12, frameon=False, edges=True) ``` -## Reconstructing gene changes along PAGA paths for a given set of genes +## Gene changes + +We can reconstruct gene changes along PAGA paths for a given set of genes Choose a root cell for diffusion pseudotime. We have 3 progenitor clusters, but cluster 5 seems the most clear. @@ -250,8 +255,7 @@ Use the full raw data for visualization. sc.pl.draw_graph(adata, color=['annot', 'dpt_pseudotime'], legend_loc='on data', legend_fontsize= 'x-small') ``` -By looking at the different know lineages and the layout of the graph we define manually some paths to the graph that corresponds to spcific -lineages. +By looking at the different know lineages and the layout of the graph we define manually some paths to the graph that corresponds to specific lineages. ```{python} # Define paths @@ -303,11 +307,9 @@ pl.show() ``` :::{.callout-note title="Discuss"} -As you can see, we can manipulate the trajectory quite a bit by selecting different number of neighbors, components etc. to fit with our -assumptions on the development of these celltypes. +As you can see, we can manipulate the trajectory quite a bit by selecting different number of neighbors, components etc. to fit with our assumptions on the development of these celltypes. -Please explore further how you can tweak the trajectory. For instance, can you create a PAGA trajectory using the orignial umap from Seurat -instead? Hint, you first need to compute the neighbors on the umap. +Please explore further how you can tweak the trajectory. For instance, can you create a PAGA trajectory using the orignial umap from Seurat instead? Hint, you first need to compute the neighbors on the umap. ::: ## Session info diff --git a/labs/scanpy/scanpy_08_spatial.qmd b/labs/scanpy/scanpy_08_spatial.qmd index 1fa565e1..0cf1aff7 100644 --- a/labs/scanpy/scanpy_08_spatial.qmd +++ b/labs/scanpy/scanpy_08_spatial.qmd @@ -35,7 +35,6 @@ import warnings warnings.simplefilter(action="ignore", category=Warning) -#sc.logging.print_versions() # gives errror!! sc.set_figure_params(facecolor="white", figsize=(8, 8)) sc.settings.verbosity = 3 ``` @@ -309,7 +308,7 @@ for i, library in enumerate( for k, v in clusters_colors.items() if k in ad.obs.clusters.unique().tolist() ], - legend_loc="on data", + legend_loc=None, show=False, ax=axs[i], ) @@ -369,6 +368,10 @@ with open('data/spatial/visium/scanpy_spatialde.pkl', 'wb') as file: ``` ```{python} +# | results: hide +# | eval: false +# skip for now + import urllib.request import os @@ -382,12 +385,19 @@ if not os.path.exists(path_file): ``` ```{python} +# | results: hide +# | eval: false +# skip for now + import pickle with open('data/spatial/visium/scanpy_spatialde.pkl', 'rb') as file: results = pickle.load(file) ``` ```{python} +# | results: hide +# | eval: false +# skip for now. # We concatenate the results with the DataFrame of annotations of variables: `adata.var`. results.index = results["g"] @@ -423,10 +433,21 @@ adata_cortex = sc.read_h5ad("data/spatial/visium/allen_cortex.h5ad") adata_cortex ``` +Here is the metadata for the cell annotation: + ```{python} adata_cortex.obs ``` +There is an issue with the raw matrix in this object that the gene names are not in the index, so we will put them back in. + +```{python} +adata_cortex.raw.var.index = adata_cortex.raw.var._index +adata_cortex.raw.var +``` + +Then we run the regular pipline with normalization and dimensionality reduction. + ```{python} sc.pp.normalize_total(adata_cortex, target_sum=1e5) sc.pp.log1p(adata_cortex) @@ -485,7 +506,7 @@ adata_anterior_subset = counts_adata[ ].copy() # select also the cortex clusters -adata_anterior_subset = adata_anterior_subset[adata_anterior_subset.obs.clusters.isin(['3','4','6','7']),:] +adata_anterior_subset = adata_anterior_subset[adata_anterior_subset.obs.clusters.isin(['3','5','6']),:] # plot to check that we have the correct regions @@ -509,7 +530,7 @@ Here, we will use deconvolution with Stereoscope implemented in the SCVI-tools p {{< meta st_deconv_genes_1 >}} ```{python} -sc.tl.rank_genes_groups(adata_cortex, 'subclass', method = "t-test", n_genes=100) +sc.tl.rank_genes_groups(adata_cortex, 'subclass', method = "t-test", n_genes=100, use_raw=False) sc.pl.rank_genes_groups_dotplot(adata_cortex, n_genes=3) ``` @@ -529,37 +550,23 @@ deg = np.intersect1d(deg,adata_anterior_subset.var.index).tolist() print(len(deg)) ``` -Train the model +### Train the model First, train the model using scRNAseq data. Stereoscope requires the data to be in counts, earlier in this tutorial we saved the spatial counts in a separate object counts_adata. -However, the single cell dataset that we dowloaded only has the lognormalized data in the adata.X slot, hence we will have to recalculate the count matrix. +In the single cell data we have the raw counts in the `raw.X` matrix so that one will be used. So here we create a new object with all the correct slots for scVI. -```{python} -# first do exponent and subtract pseudocount -E = np.exp(adata_cortex.X)-1 -n = np.sum(E,1) -print(np.min(n), np.max(n)) -# all sums to 1.7M -factor = np.mean(n) -nC = np.array(adata_cortex.obs.nCount_RNA) # true number of counts -scaleF = nC/factor -C = E * scaleF[:,None] -C = C.astype("int") -``` ```{python} sc_adata = adata_cortex.copy() -sc_adata.X = C +sc_adata.X = adata_cortex.raw.X.copy() ``` Setup the anndata, the implementation requires the counts matrix to be in the "counts" layer as a copy. ```{python} -# | eval: false -# this chunk has issues and therefore not evaluated import scvi # from scvi.data import register_tensor_from_anndata @@ -576,8 +583,6 @@ RNAStereoscope.setup_anndata(sc_adata, layer="counts", labels_key="subclass") ``` ```{python} -# | eval: false -# this chunk has issues and therefore not evaluated # the model is saved to a file, so if is slow to run, you can simply reload it from disk by setting train = False @@ -592,13 +597,11 @@ else: print("Loaded RNA model from file!") ``` -Predict propritions on the spatial data +### Predict proportions on the spatial data First create a new st object with the correct genes and counts as a layer. ```{python} -# | eval: false -# this chunk has issues and therefore not evaluated st_adata = adata_anterior_subset.copy() @@ -609,8 +612,6 @@ SpatialStereoscope.setup_anndata(st_adata, layer="counts") ``` ```{python} -#| eval: false -# this chunk has issues and therefore not evaluated train=True if train: @@ -626,8 +627,6 @@ else: Get the results from the model, also put them in the .obs slot. ```{python} -# | eval: false -# this chunk has issues and therefore not evaluated st_adata.obsm["deconvolution"] = spatial_model.get_proportions() @@ -639,8 +638,6 @@ for ct in st_adata.obsm["deconvolution"].columns: We are then able to explore how cell types in the scRNA-seq dataset are predicted onto the visium dataset. Let's first visualize the neurons cortical layers. ```{python} -# | eval: false -# this chunk has issues and therefore not evaluated sc.pl.spatial( st_adata, @@ -655,8 +652,6 @@ sc.pl.spatial( We can go ahead an visualize astrocytes and oligodendrocytes as well. ```{python} -# | eval: false -# this chunk has issues and therefore not evaluated sc.pl.spatial( st_adata, img_key="hires", color=["Oligo", "Astro"], size=1.5, library_id=lib_a @@ -666,8 +661,6 @@ sc.pl.spatial( {{< meta st_2 >}} ```{python} -# | eval: false -# this chunk has issues and therefore not evaluated sc.pl.violin(st_adata, ["L2/3 IT", "L6 CT","Oligo","Astro"], jitter=0.4, groupby = 'clusters', rotation= 45) @@ -677,8 +670,6 @@ sc.pl.violin(st_adata, ["L2/3 IT", "L6 CT","Oligo","Astro"], {{< meta st_3 >}} ```{python} -# | eval: false -# this chunk has issues and therefore not evaluated lib_p = "V1_Mouse_Brain_Sagittal_Posterior" diff --git a/labs/seurat/seurat_07_trajectory.qmd b/labs/seurat/seurat_07_trajectory.qmd index c7cef26b..4a022dfa 100644 --- a/labs/seurat/seurat_07_trajectory.qmd +++ b/labs/seurat/seurat_07_trajectory.qmd @@ -63,9 +63,14 @@ If you have been using the **Seurat**, **Bioconductor** or **Scanpy** toolkits w - The cell clustering information (for example: from Louvain, k-means) - A KNN/SNN graph (this is useful to inspect and sanity-check your trajectories) +We already have pre-computed and subsetted the dataset (with 6688 cells and 3585 genes) following the analysis steps in this course. We then saved the objects, so you can use common tools to open and start to work with them (either in R or Python). + +In addition there was some manual filtering done to remove clusters that are disconnected and cells that are hard to cluster, which can be seen in this [script](https://github.com/NBISweden/workshop-scRNAseq/blob/master/scripts/data_processing/slingshot_preprocessing.Rmd) + + ## Reading data -We already have pre-computed and subsetted the dataset (with 6688 cells and 3585 genes) following the analysis steps in this course. We then saved the objects, so you can use common tools to open and start to work with them (either in R or Python). + ```{r } obj <- readRDS("data/trajectory/trajectory_seurat_filtered.rds") @@ -465,98 +470,13 @@ for (i in vars) { Check out this [vignette](https://statomics.github.io/tradeSeq/articles/tradeSeq.html) for a more in-depth overview of tradeSeq and many other differential expression tests. -::: {.content-hidden} ## Generating batch-corrected data for differential gene expression Before computing differential gene expression, sometimes it is a good idea to make sure our dataset is somewhat homogeneous (without very strong batch effects). In this dataset, we actually used data from 4 different technologies (Drop-seq, SmartSeq2 and 10X) and therefore massive differences in read counts can be observed: -```{r} -#| eval: false - -# SEURAT -VlnPlot(obj, features = "nUMI", group.by = "batches") - -# BIOCONDUCTOR -# plotColData(sce,y = "nUMI",x = "batches",colour_by = "batches") -``` - -Since we are not interested in the effects of the batches in this example, but only the differentiation paths for each cell type. We can use the integrated space of harmony embedding (where we removed batch effects). Since the harmony (same applies to MNN, SCANORAMA, LIGER ) is a corrected version of PCA, we can multiply the harmony embedding with PCA loadings to generate batch-corrected "pseudo counts". Note that we can only reconstruct data from the highly variable genes that were used to compute PCA and HARMONY. - -```{r} -#| eval: false - -# Get the gene means and standard deviation -library(sparseMatrixStats) -genes <- rownames(PCA_loadings) -gene_means <- rowMeans2(filt_NORM_COUNTS[genes, ]) -gene_sd <- sqrt(rowVars(filt_NORM_COUNTS[genes, ])) - -# Project normalized gene counts -CORRECTED_NORMCOUNTS <- t(filt_HARMONY %*% t(PCA_loadings)) * gene_sd + gene_means - 0.02 -CORRECTED_NORMCOUNTS <- Matrix(round(CORRECTED_NORMCOUNTS, 3), sparse = T) -CORRECTED_NORMCOUNTS@x[CORRECTED_NORMCOUNTS@x < 0] <- 0 -CORRECTED_NORMCOUNTS <- drop0(CORRECTED_NORMCOUNTS) - -# Transform the normalized data back to raw counts (used for differential expression) -CORRECTED_COUNTS <- round((expm1(CORRECTED_NORMCOUNTS)) * 1000) -``` - -Let's compare how the normalized data compares to the batch-corrected one. - -```{r} -#| eval: false - -par(mfrow = c(3, 3)) -{ - plot(obj@reductions$umap@cell.embeddings, type = "n") - draw_graph(layout = obj@reductions$umap@cell.embeddings, graph = filt_KNN) - points(obj@reductions$umap@cell.embeddings, col = pal[filt_clustering], pch = 16) - text(centroids2d[, 1], centroids2d[, 2], - labels = rownames(centroids2d), cex = 0.8, font = 2 - ) -} - -vars <- c("Cd34", "Ms4a1", "Cd3e", "Ltf", "Cst3", "Mcpt8", "Alas2", "Siglech") -for (i in vars) { - plot(filt_NORM_COUNTS[i, ], CORRECTED_NORMCOUNTS[i, ], main = i, pch = 16, cex = 0.4) - rr <- c(diff(range(filt_NORM_COUNTS[i, ])) / 50, (range(CORRECTED_NORMCOUNTS[i, ]))) - polygon(c(-rr[1], -rr[1], rr[1], rr[1]), c(rr[3], rr[2], rr[2], rr[3]), border = "red") - text(rr[1], max(CORRECTED_NORMCOUNTS[i, ]), " < Imputed\n counts", adj = c(0, 1), col = "red", font = 2) -} -``` - -:::{.callout-caution} -Please note in the graphs above that there is a significant amount of imputation (i.e., we artificially add counts to certain cells where we'd expect to see). Please keep this in mind and use these matrices with caution in downstream analysis! -::: - -Let's also take a closer inspection on the UMAPs: - -```{r} -#| eval: false - -par(mfrow = c(4, 5), mar = c(.1, .1, 2, 1)) - -vars <- c("Cd34", "Ms4a1", "Cd3e", "Ltf", "Cst3", "Mcpt8", "Alas2", "Siglech", "C1qc") -for (j in c("filt_NORM_COUNTS", "CORRECTED_NORMCOUNTS")) {{ - plot(obj@reductions$umap@cell.embeddings, type = "n", axes = F, xlab = "", ylab = "", main = j) - draw_graph(layout = obj@reductions$umap@cell.embeddings, graph = filt_KNN) - points(obj@reductions$umap@cell.embeddings, col = pal[obj$clusters_use], pch = 16) - text(centroids2d, labels = rownames(centroids2d), cex = 0.8, font = 2) -} - -for (i in vars) { - x <- get(j)[i, ] - x <- x - min(x) / (max(x) - min(x)) - o <- order(x) - plot(obj@reductions$umap@cell.embeddings[o, ], - main = paste0(i), pch = 16, cex = 0.4, axes = F, xlab = "", ylab = "", - col = colorRampPalette(c("lightgray", "blue"))(99)[x[o] * 98 + 1] - ) -}} -``` +If you want to know more about how to control for this issue, please have a look at [batch_corrected_counts.Rmd](https://github.com/NBISweden/workshop-scRNAseq/blob/master/scripts/data_processing/batch_corrected_counts.Rmd) -::: ## References diff --git a/scripts/data_processing/batch_corrected_counts.Rmd b/scripts/data_processing/batch_corrected_counts.Rmd new file mode 100644 index 00000000..e573f650 --- /dev/null +++ b/scripts/data_processing/batch_corrected_counts.Rmd @@ -0,0 +1,88 @@ + + + + +```{r} +#| eval: false + +# SEURAT +VlnPlot(obj, features = "nUMI", group.by = "batches") + +# BIOCONDUCTOR +# plotColData(sce,y = "nUMI",x = "batches",colour_by = "batches") +``` + +Since we are not interested in the effects of the batches in this example, but only the differentiation paths for each cell type. We can use the integrated space of harmony embedding (where we removed batch effects). Since the harmony (same applies to MNN, SCANORAMA, LIGER ) is a corrected version of PCA, we can multiply the harmony embedding with PCA loadings to generate batch-corrected "pseudo counts". Note that we can only reconstruct data from the highly variable genes that were used to compute PCA and HARMONY. + +```{r} +#| eval: false + +# Get the gene means and standard deviation +library(sparseMatrixStats) +genes <- rownames(PCA_loadings) +gene_means <- rowMeans2(filt_NORM_COUNTS[genes, ]) +gene_sd <- sqrt(rowVars(filt_NORM_COUNTS[genes, ])) + +# Project normalized gene counts +CORRECTED_NORMCOUNTS <- t(filt_HARMONY %*% t(PCA_loadings)) * gene_sd + gene_means - 0.02 +CORRECTED_NORMCOUNTS <- Matrix(round(CORRECTED_NORMCOUNTS, 3), sparse = T) +CORRECTED_NORMCOUNTS@x[CORRECTED_NORMCOUNTS@x < 0] <- 0 +CORRECTED_NORMCOUNTS <- drop0(CORRECTED_NORMCOUNTS) + +# Transform the normalized data back to raw counts (used for differential expression) +CORRECTED_COUNTS <- round((expm1(CORRECTED_NORMCOUNTS)) * 1000) +``` + +Let's compare how the normalized data compares to the batch-corrected one. + +```{r} +#| eval: false + +par(mfrow = c(3, 3)) +{ + plot(obj@reductions$umap@cell.embeddings, type = "n") + draw_graph(layout = obj@reductions$umap@cell.embeddings, graph = filt_KNN) + points(obj@reductions$umap@cell.embeddings, col = pal[filt_clustering], pch = 16) + text(centroids2d[, 1], centroids2d[, 2], + labels = rownames(centroids2d), cex = 0.8, font = 2 + ) +} + +vars <- c("Cd34", "Ms4a1", "Cd3e", "Ltf", "Cst3", "Mcpt8", "Alas2", "Siglech") +for (i in vars) { + plot(filt_NORM_COUNTS[i, ], CORRECTED_NORMCOUNTS[i, ], main = i, pch = 16, cex = 0.4) + rr <- c(diff(range(filt_NORM_COUNTS[i, ])) / 50, (range(CORRECTED_NORMCOUNTS[i, ]))) + polygon(c(-rr[1], -rr[1], rr[1], rr[1]), c(rr[3], rr[2], rr[2], rr[3]), border = "red") + text(rr[1], max(CORRECTED_NORMCOUNTS[i, ]), " < Imputed\n counts", adj = c(0, 1), col = "red", font = 2) +} +``` + +:::{.callout-caution} +Please note in the graphs above that there is a significant amount of imputation (i.e., we artificially add counts to certain cells where we'd expect to see). Please keep this in mind and use these matrices with caution in downstream analysis! +::: + +Let's also take a closer inspection on the UMAPs: + +```{r} +#| eval: false + +par(mfrow = c(4, 5), mar = c(.1, .1, 2, 1)) + +vars <- c("Cd34", "Ms4a1", "Cd3e", "Ltf", "Cst3", "Mcpt8", "Alas2", "Siglech", "C1qc") +for (j in c("filt_NORM_COUNTS", "CORRECTED_NORMCOUNTS")) {{ + plot(obj@reductions$umap@cell.embeddings, type = "n", axes = F, xlab = "", ylab = "", main = j) + draw_graph(layout = obj@reductions$umap@cell.embeddings, graph = filt_KNN) + points(obj@reductions$umap@cell.embeddings, col = pal[obj$clusters_use], pch = 16) + text(centroids2d, labels = rownames(centroids2d), cex = 0.8, font = 2) +} + +for (i in vars) { + x <- get(j)[i, ] + x <- x - min(x) / (max(x) - min(x)) + o <- order(x) + plot(obj@reductions$umap@cell.embeddings[o, ], + main = paste0(i), pch = 16, cex = 0.4, axes = F, xlab = "", ylab = "", + col = colorRampPalette(c("lightgray", "blue"))(99)[x[o] * 98 + 1] + ) +}} +``` diff --git a/scripts/data_processing/convert_to_h5ad.R b/scripts/data_processing/convert_to_h5ad.R new file mode 100644 index 00000000..7f5e03b9 --- /dev/null +++ b/scripts/data_processing/convert_to_h5ad.R @@ -0,0 +1,19 @@ +# convert Seurat data to h5ad in R + +library(Seurat) +library(SeuratDisk) + +tdata = readRDS("./../data/bone_marrow/trajectory_seurat_filtered.rds") + +#OBS! Scanpy will consider columns with numbers as factors as numerical, need to convert them to strings. For instance the cluster numbers. +tdata$clusters = as.character(tdata$clusters) +tdata$metadata_clusters = as.character(tdata$metadata_clusters) + + +tmpfile = "./../data/bone_marrow/trajectory_scanpy_filtered.h5Seurat" + +SaveH5Seurat(tdata, filename = tmpfile) +Convert(tmpfile, dest = "h5ad", overwrite = T) + +file.remove(tmpfile) + diff --git a/scripts/data_processing/slingshot_preprocessing.Rmd b/scripts/data_processing/slingshot_preprocessing.Rmd new file mode 100644 index 00000000..05aa1052 --- /dev/null +++ b/scripts/data_processing/slingshot_preprocessing.Rmd @@ -0,0 +1,399 @@ +--- +author: "Åsa Björklund & Paulo Czarnewski" +date: '`r format(Sys.Date(), "%B %d, %Y")`' +output: + html_document: + self_contained: true + highlight: tango + df_print: paged + toc: yes + toc_float: + collapsed: false + smooth_scroll: true + toc_depth: 3 + keep_md: yes + fig_caption: true + html_notebook: + self_contained: true + highlight: tango + df_print: paged + toc: yes + toc_float: + collapsed: false + smooth_scroll: true + toc_depth: 3 +editor_options: + chunk_output_type: console +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(message=FALSE, warning=FALSE, result='hold',fig.width=12, fig.height=12,tidy=TRUE) +``` + +# Trajectory inference analysis: Slingshot + + +### Loading libraries + +```{r} +suppressPackageStartupMessages({ + library(Seurat) + library(rafalib) + library(cowplot) + library(plotly) + options(rgl.printRglwidget = TRUE) + library(Matrix) + library(sparseMatrixStats) + library(slingshot) +}) + +#Define some color palette +pal <- c(scales::hue_pal()(8),RColorBrewer::brewer.pal(9,"Set1"),RColorBrewer::brewer.pal(8,"Set2") ) +set.seed(1); pal <- rep( sample( pal , length(pal) ) , 200 ) +``` + +Nice function to easily draw a graph: + +```{r} +#Add graph to the base R graphics plot +draw_graph <- function( layout , graph , lwd = 0.2 , col = "grey" ){ + res <- rep(x = 1:(length(graph@p)-1) , times = (graph@p[-1] - graph@p[-length(graph@p)]) ) + segments(x0 = layout[graph@i+1,1], x1=layout[res,1], + y0 = layout[graph@i+1,2], y1=layout[res,2], lwd=lwd , col=col )} +``` + +### Loading data + +In order to speed up the computations during the exercises, we will be using a subset of a bone marrow dataset (originally containing about 100K cells). The bone marrow is the source of adult immune cells, and contains virtually all differentiation stages of cell from the **immune** system which later circulate in the blood to all other organs. + +![](Hematopoiesis.png) + +You can download the files we prepared with these commands: + +```{r} +webpath <- "https://github.com/NBISweden/workshop-scRNAseq/blob/master/labs/data/bone_marrow/" +file_list <- c("trajectory_seurat.rds", + "trajectory_scran.rds") +for(i in file_list){ download.file( url = paste0(webpath,i,"?raw=true") , destfile = paste0(i))} +``` + +If you have been using the `scran/scater` , `Seurat` or `Scanpy` pipelines with your own data, you need to reach to the point where can find get: + +* A dimensionality reduction where to perform the trajectory (for example: PCA, ICA, MNN, harmony, Diffusion Maps, UMAP) +* The cell clustering information (for example: from Louvain, k-means) +* A KNN/SNN graph (this is useful to inspect and sanity-check your trajectories) + + +### Loading the dataset +*** + +We already have pre-computed and subseted the dataset (with 6688 cells and 3585 genes) following the analysis steps in this course. We then saved the objects, so you can use common tools to open and start to work with them (either in R or Python). + +```{r } +obj <- readRDS("trajectory_seurat.rds") +``` + +Lets visualize which clusters we have in our dataset: + +```{r} +vars <- c("batches","dataset","clusters","Phase") ; pl <- list() + +for(i in vars){ pl[[i]] <- DimPlot( obj , group.by = i ,label = T) + theme_void() + NoLegend() } +plot_grid(plotlist = pl) +``` + +You can check, for example how many cells are in each cluster: + +```{r} +table(obj$clusters) +``` + + +### Exploring the data +*** + +It is crucial that you performing analysis of a dataset understands what is going on, what are the clusters you see in your data and most importantly **How are the clusters related to each other?**. Well, let's explore the data a bit. With the help of this table, write down which cluster numbers in your dataset express these key markers. + +| Marker | Cell Type | +|---------|-------------------------| +| Cd34 | HSC progenitor | +| Ms4a1 | B cell lineage | +| Cd3e | T cell lineage | +| Ltf | Granulocyte lineage | +| Cst3 | Monocyte lineage | +| Mcpt8 | Mast Cell lineage | +| Alas2 | RBC lineage | +| Siglech | Dendritic cell lineage | +| C1qc | Macrophage cell lineage | +| Pf4 | Megakaryocyte cell lineage | + + +```{r} +vars <- c("Cd34","Ms4a1","Cd3e","Ltf","Cst3","Mcpt8","Alas2","Siglech","C1qc","Pf4"); pl <- list() + +pl <- list( DimPlot( obj , group.by = "clusters" , label = T) + theme_void() + NoLegend() ) +for(i in vars){ pl[[i]] <- FeaturePlot( obj , features = i , order = T) + theme_void() + NoLegend() } +plot_grid(plotlist = pl) +``` + +Another way to better explore your data is look in higher dimensions, to really get a sense for what is right or wrong. As mentioned in the dimensionality reduction exercises, here we ran UMAP with **3** dimensions (**IMPORTANT: the UMAP needs to be computed to results in _exactly_ 3 dimensions**). + +Since the steps below are identical to both `Seurat` and `Scran` pipelines, we ill extract the matrices from both, so it is clear what is being used where and to remove long lines of code used to get those matrices. We will use them all. + +```{r} +#SEURAT +NORM_COUNTS <- obj@assays$RNA@data +UMAP2 <- obj@reductions$umap@cell.embeddings +UMAP3 <- obj@reductions$umap3d@cell.embeddings +HARMONY <- obj@reductions$harmony_Phase@cell.embeddings +PCA <- obj@reductions$pca@cell.embeddings +PCA_loadings <- obj@reductions$pca@feature.loadings +clustering <- factor(obj$clusters) +KNN <- obj@graphs$knn + +# Calculate cluster centroids (for plotting the labels later) +mm <- sparse.model.matrix( ~ 0 + factor( clustering ) ) ; colnames(mm) <- levels( factor( clustering ) ) +centroids3d <- as.matrix( t( t(UMAP3) %*% mm ) / Matrix::colSums(mm) ) +centroids2d <- as.matrix( t( t(UMAP2) %*% mm ) / Matrix::colSums(mm) ) +``` + +Plot in 3D with `Plotly`: + +```{r} +df <- data.frame( UMAP3 , variable=clustering ) +colnames(df)[1:3] <- c("UMAP_1","UMAP_2","UMAP_3") +p_State <- plot_ly(df,x = ~UMAP_1, y = ~UMAP_2, z=~UMAP_3,color = ~variable, colors = pal, size=.5) +try(htmlwidgets::saveWidget(p_State, + selfcontained = T,"umap_3d_clustering_plotly.html"),silent = T) +browseURL("umap_3d_clustering_plotly.html") +p_State +``` + + +### Data pre-processing prior trajectory inference + +Before we take a dive into trajectory itself, we need to clean up the data a bit, so we can get nice figures later. This dataset is already clean in terms of quality of the cells and so on, but as you probably noticed in the clustering exercise, there is always some outlier cells in the "wrong" part of the plot (as in, far away from cell from the same cluster ). + +In reality, this is distortion caused by the graph layout algorithm (UMAP in this case) that tries to **force the data to be shown in 2 dimensions**. If you check the 3D plots, the data points are in the "correct" position closer to other points in the same clusters (there is not a single misplaced point). Still, in most cases not even 3 dimensions is enough to represent the data correctly. + +In most cases this doesn't affect any results, but it affects the trajectories. As there are many ways to handle this, but the simplest is just change of the outlier cells to be closer its "cluster-mates". + +```{r} +# calculate the distance from every cell to its neighbors +expected_U2d <- t( t(UMAP2) %*% KNN ) / colSums2(KNN) +d <- rowSums( (expected_U2d - UMAP2)^2 ) ^ (1/2) +cutoff <- mean(d) + 5 * sd(d) +to_keep <- ( d < cutoff ) + +new_UMAP2 <- UMAP2 +res <- as.matrix( t( t(UMAP2) %*% KNN ) / colSums2(KNN) ) +new_UMAP2[ !to_keep , ] <- res[ !to_keep , ] + +new_centroids2d <- as.matrix( t( t(new_UMAP2) %*% mm ) / Matrix::colSums(mm) ) +``` + +And let's plot the UMAP again: + +```{r} +# Check the UMAP in 2D +mypar() +plot( new_UMAP2 , type="n") +draw_graph( layout = new_UMAP2 , graph = KNN ) +points( new_UMAP2 , cex = ifelse(!to_keep,1,0.3) , + lwd = ifelse(!to_keep,2,0) , + bg = pal[ clustering ], pch=21 ) +text(new_centroids2d,labels = rownames(new_centroids2d),cex=1,font=2) +``` + +Much better! + + +#### Trajectory inference with Slingshot +*** + +Until up to this point, the steps above have been somewhat covered in the previous lectures. From now on, we will start using that clustering and data reduction techniques for trajectory inference. + +#### Which dimensionality reduction to use? + +Here, the choice upon which dimension to use have a great impact on your results. Ideally, we should use other multidimensional representations (more than 2D) for the calculations and only later then *visualize* in your UMAP 2D. For example, we could run even some other additional dimensionality reduction methods (`ICA` and `DiffusionMaps`) on top of the integrated harmony embedding, where the batch effects were corrected. + +#### Defining cell lineages with Slingshot + +Let run default `Slingshot` lineage identification on the **2D UMAP** to get a first impression about it (for the sake of simplicity), but you are welcome to explore **different embedding** and use **more dimensions**. The whole process can be done using a single function named `slingshot`, which is simply a wrapper for the 2 main steps for trajectory inference. The first step of the process is to define the lineages and then fit a curve through the data that defines a trajectory. These steps are break down below for clarity. + + +```{r, fig.height=6} +# Run Slingshot +set.seed(1) +lineages <- as.SlingshotDataSet(getLineages( data = new_UMAP2 , clusterLabels = clustering )) + +# Change the reduction (FOR VISUALISATION ONLY, in case you use another dimension for calculations) +lineages +lineages@reducedDim <- new_UMAP2 + +#Plot the lineages +mypar(1,2) ; plot(new_UMAP2, col = pal[clustering], cex=.5,pch = 16) +lines(lineages, lwd = 2, col = 'black', cex=3 ) +text(new_centroids2d,labels = rownames(new_centroids2d),cex=0.8,font=2,col = "white") + +# Check the UMAP in 2D +plot( new_UMAP2 , type="n") +draw_graph( layout = new_UMAP2 , graph = KNN ) +points( new_UMAP2 , cex = ifelse(!to_keep,1,0.3) , + lwd = ifelse(!to_keep,2,0) , + bg = pal[ clustering ], pch=21 ) +text(new_centroids2d,labels = rownames(new_centroids2d),cex=0.8,font=2) +``` + +Now take a closer look on the object, which lineages you see? Which is the starting cell cluster? + +```{r} +print(lineages) +``` + +Here we see one central issue with trajectory analysis: where does the trajectory begin? Without any extra information, this is nearly an **impossible** task for any T.I. method. We need **prior biological** information to be able to define where the trajectory starts and where it should end. + +First, we need to make sure to identify which cluster is the progenitor cell. In this case, they express the marker CD34. + +| Marker | Cell Type | +|---------|-------------------------| +| Cd34 | HSC progenitor | +| Ms4a1 | B cell lineage | +| Cd3e | T cell lineage | +| Ltf | Granulocyte lineage | +| Cst3 | Monocyte lineage | +| Mcpt8 | Mast Cell lineage | +| Alas2 | RBC lineage | +| Siglech | Dendritic cell lineage | + + +```{r, fig.height=6} +#SEURAT +plot_grid(plotlist = list( + DimPlot( obj, group.by = "clusters",label = T) + theme_void() + NoLegend() , + FeaturePlot(obj,features = "Cd34",order = T) + theme_void() + NoLegend())) +``` + +Then, we can insert that information on **where the trajectory** starts on the `getLineages` function. + +```{r, fig.width=6, fig.height=6} +# Run Slingshot +set.seed(1) +lineages <- as.SlingshotDataSet(getLineages(data = new_UMAP2, + clusterLabels = clustering, + #end.clus = c("4","3","13","9"), # You can also define the ENDS! + start.clus = "57")) # define where to START the trajectories + +# Change the reduction (FOR VISUALISATION ONLY, in case you use another dimension for calculations) +lineages@reducedDim <- new_UMAP2 + + +#Plot the lineages +mypar() ; plot(new_UMAP2, col = pal[clustering], cex=.5,pch = 16) +lines(lineages, lwd = 2, col = 'black', cex=3 ) +text(new_centroids2d,labels = rownames(new_centroids2d),cex=0.8,font=2,col = "white") +``` + +If you compare this plot with the previous, you will not notice many differences, but let's check + +```{r} +print(lineages) +``` + +What changed? + + +#### The issue with unconnected clusters + +As you have probably noticed, there are many other small clusters in the dataset that do not express those markers. Those are other less abundant cell types, which the **intermediate cell states were NOT present** (because there were not enough cells captured)! Slingshot tries to fit a **model / MST** (minimum spanning tree) on the whole data, so for this example, we can look at the KNN graph to help us filter out some cells that can't estimate trajectories with confidence. + +Here, we will also remove some other small clusters just for the sake of simplicity on the steps below. In practice, sometimes it is easier to work with parts of the dataset at a time, in order to explore trajectories in more details. + +```{r, fig.width=6, fig.height=6} +mypar() ; plot(new_UMAP2, col = pal[clustering], cex=.5,pch = 16) +draw_graph( layout = new_UMAP2 , graph = KNN ) +text(new_centroids2d,labels = rownames(new_centroids2d),cex=1,font=2) + + +# Define clusters to filter +sort( table(clustering) ) +clusters_to_remove <- c("71","65","73","66","56","74","42","69","62","40","30","68","70", +"64","67","51","24","63","48","3","10","4","31","72","39") +cell_to_keep <- !( clustering %in% clusters_to_remove ) + +# Filtering clusters +filt_new_UMAP2 <- new_UMAP2[ cell_to_keep, ] +filt_UMAP3 <- UMAP3[ cell_to_keep ,] +filt_NORM_COUNTS <- NORM_COUNTS[ ,cell_to_keep ] +filt_PCA <- PCA[ cell_to_keep, ] +filt_HARMONY <- HARMONY[ cell_to_keep, ] +filt_KNN <- KNN[ cell_to_keep , cell_to_keep] +filt_clustering <- factor( clustering [ cell_to_keep ] ) +filt_new_centroids2d <- as.matrix(new_centroids2d[ !( rownames(new_centroids2d) %in% clusters_to_remove ) , ]) +filtcentroids3d <- as.matrix(centroids3d[ !( rownames(centroids3d) %in% clusters_to_remove ) , ]) + + + +# Plot +mypar() ; plot(filt_new_UMAP2,type="n") ; draw_graph( layout = filt_new_UMAP2 , graph = filt_KNN ) +points(filt_new_UMAP2, col = pal[filt_clustering], cex=.5,pch = 16) +text(filt_new_centroids2d[,1:2],labels = rownames(filt_new_centroids2d),cex=1,font=1) +``` + +#### The issue with very similar undistinguishible clusters + +Another issue is related to the clustering resolution used. For trajectories, it is better to use fine-grain resolution to separate intermediate cell types. Again, there are many approaches to that, and here we will use the KNN graph to compute the amount of connections between clusters. If the amount of connections is above a certain threshold, we then merge those clusters. **IMPORTANT: The code below is very useful in many other single cell data analysis occasions!** + +```{r, fig.width=6, fig.height=6} +# Compute connections between clusters on a graph +filt_mm <- sparse.model.matrix( ~ 0 + factor( filt_clustering ) ) +colnames(filt_mm) <- levels( factor( filt_clustering ) ) +d <- t( filt_KNN %*% filt_mm ) %*% filt_mm / ( t( t( colSums(filt_mm) ) ) %*% t(colSums(filt_mm)) )^(1/2) +diag(d) <- 0 ; d <- drop0(d) +pheatmap::pheatmap(d,clustering_method = "ward.D2") + + + +# Merging similar clusters clusters +hist( d@x , breaks = 50) ; cutoff <- 1.2*(sum( (d@x^2) )/(length(d@x)-1) )^(1/2) +abline(v=(cutoff),col="red",xpd=F) ; cutoff +to_merge <- drop0( ( d > cutoff )*1 ) +to_merge <- to_merge * lower.tri(to_merge) +diag(to_merge)[ rowSums(to_merge) == 0 ] <- 1 +# Plot cluster mappings +pheatmap::pheatmap(to_merge,cluster_rows = F,cluster_cols = F) + + + +# Merge the cluster labels +mappings <- cbind( from = colnames(to_merge) [ to_merge@i+1 ] , + to=colnames(to_merge) [rep(x = 1:(length(to_merge@p)-1) , + times = (to_merge@p[-1] - to_merge@p[-length(to_merge@p)]) )] ) +merged_filt_clustering <- factor( mappings[match(filt_clustering,mappings[,1]),2] ) +merged_filt_mm <- sparse.model.matrix( ~ 0 + factor( merged_filt_clustering ) ) +colnames(merged_filt_mm) <- levels( factor( merged_filt_clustering ) ) +merged_filt_new_centroids2d <- as.matrix( t( t(filt_new_UMAP2) %*% merged_filt_mm ) / Matrix::colSums(merged_filt_mm)) + + + +# Plot the new clusterings +mypar() ; plot(filt_new_UMAP2,type="n") ; draw_graph( layout = filt_new_UMAP2 , graph = filt_KNN ) +points(filt_new_UMAP2, col = pal[merged_filt_clustering], cex=.5,pch = 16) +text(merged_filt_new_centroids2d, labels = rownames(merged_filt_new_centroids2d),cex=1,font=1) + + + +``` + +We can now save our object. + +```{r} +obj_new <- obj[,cell_to_keep] +obj_new@graphs <- list( KNN = filt_KNN ) +obj_new$clusters_use <- factor(merged_filt_clustering) +saveRDS(obj_new , "trajectory_seurat_filtered.rds") +``` + + +# END + diff --git a/scripts/subsample_covid_data.Rmd b/scripts/data_processing/subsample_covid_data.Rmd similarity index 100% rename from scripts/subsample_covid_data.Rmd rename to scripts/data_processing/subsample_covid_data.Rmd