Imagroune commited on
Commit
0f8fe84
1 Parent(s): 1738458

Pushing ScienceQA version

Browse files
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4214710d3c4b31d9a89527da2c196e344c9a41fbf6e4a7e942a8a626b9e911c5
3
  size 4917078632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:837c35fe2d467993abf7e629c20e2e85e280cd827d60f01f0b82a42c592eb3ca
3
  size 4917078632
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e74788d2d95dc3174fd671a7e987fb4fb0243e25b1c8803a1fef8e084117638e
3
  size 4983443424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4be49e2abc4ce683f7a190b95871b759deeab622aa488ee6e7051aaf2ae190a
3
  size 4983443424
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24ca5c47d98179d7796291d5d4d6c2b8706f5173d2dcfc3fa57a6d394575f9fd
3
  size 932581696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51390a18b5727f7acebfc9ac9aab1d214882ad055fb8b165ea2b03136633ef49
3
  size 932581696
modeling_feynmodel.py CHANGED
@@ -1,15 +1,19 @@
1
  # modeling_fynmodel : Imed MAGROUNE / 2024 - 09
 
2
  # original code from modeling_FeynModel
 
3
  # add DaVit Vision Tower
4
  #
5
- # update generate forward function
6
  #
7
  # add lora adapters
8
  #
9
  # train on coco OD and vision reasoning
 
10
  # train on ScenceQA
11
  #
12
- # todo add mamaba layer
 
13
  #
14
  # todo train on Arc-AGI
15
 
@@ -50,7 +54,7 @@ from transformers.modeling_outputs import (
50
  from typing import List, Optional, Tuple, Union
51
 
52
  from transformers.models.gemma2.modeling_gemma2 import Gemma2Model, Gemma2ForCausalLM,Gemma2DecoderLayer,Gemma2RMSNorm
53
- from .configuration_feynmodel import FeynModelConfig,Florence2VisionConfig
54
 
55
  from transformers import AutoProcessor, AutoTokenizer, AutoModelForCausalLM
56
  import json
@@ -1330,7 +1334,7 @@ class FeynModelForCausalLM(Gemma2ForCausalLM):
1330
  inputs_embeds = self.get_input_embeddings()(input_ids)
1331
  image_features = self._encode_image(pixel_values)
1332
  inputs_embeds, causal_attention_mask = self._merge_input_ids_with_image_features(image_features, inputs_embeds )
1333
- causal_attention_mask = create_git_attention_mask(tgt=input_ids, memory=image_features,max_length=2048)
1334
  causal_attention_mask=causal_attention_mask.to(input_ids.device)
1335
  self.__causal_attention_mask=causal_attention_mask
1336
 
@@ -1495,7 +1499,7 @@ class FeynModelForCausalLM(Gemma2ForCausalLM):
1495
  if input_ids is not None:
1496
 
1497
  inputs_embeds = self.get_input_embeddings()(input_ids)
1498
- print("pixels")
1499
  image_features = self._encode_image(pixel_values)
1500
  inputs_embeds, causal_attention_mask = self._merge_input_ids_with_image_features(image_features, inputs_embeds )
1501
  causal_attention_mask = create_git_attention_mask(tgt=input_ids, memory=image_features,max_length=max_length)
 
1
  # modeling_fynmodel : Imed MAGROUNE / 2024 - 09
2
+ #
3
  # original code from modeling_FeynModel
4
+ # Use of Gemma2 Layers
5
  # add DaVit Vision Tower
6
  #
7
+ # update generate and forward function
8
  #
9
  # add lora adapters
10
  #
11
  # train on coco OD and vision reasoning
12
+ #
13
  # train on ScenceQA
14
  #
15
+ #
16
+ # add mamaba layer
17
  #
18
  # todo train on Arc-AGI
19
 
 
54
  from typing import List, Optional, Tuple, Union
55
 
56
  from transformers.models.gemma2.modeling_gemma2 import Gemma2Model, Gemma2ForCausalLM,Gemma2DecoderLayer,Gemma2RMSNorm
57
+ from configuration_feynmodel import FeynModelConfig,Florence2VisionConfig
58
 
59
  from transformers import AutoProcessor, AutoTokenizer, AutoModelForCausalLM
60
  import json
 
1334
  inputs_embeds = self.get_input_embeddings()(input_ids)
1335
  image_features = self._encode_image(pixel_values)
1336
  inputs_embeds, causal_attention_mask = self._merge_input_ids_with_image_features(image_features, inputs_embeds )
1337
+ causal_attention_mask = create_git_attention_mask(tgt=input_ids, memory=image_features,max_length=8192)
1338
  causal_attention_mask=causal_attention_mask.to(input_ids.device)
1339
  self.__causal_attention_mask=causal_attention_mask
1340
 
 
1499
  if input_ids is not None:
1500
 
1501
  inputs_embeds = self.get_input_embeddings()(input_ids)
1502
+
1503
  image_features = self._encode_image(pixel_values)
1504
  inputs_embeds, causal_attention_mask = self._merge_input_ids_with_image_features(image_features, inputs_embeds )
1505
  causal_attention_mask = create_git_attention_mask(tgt=input_ids, memory=image_features,max_length=max_length)