diff --git a/llm_web_kit/extractor/html/recognizer/code/classes.py b/llm_web_kit/extractor/html/recognizer/code/classes.py index b71af41a..50276e32 100644 --- a/llm_web_kit/extractor/html/recognizer/code/classes.py +++ b/llm_web_kit/extractor/html/recognizer/code/classes.py @@ -4,13 +4,21 @@ replace_node_by_cccode from llm_web_kit.extractor.html.recognizer.recognizer import CCTag +no_code_tags = ['audio', 'td', 'span','ul', 'li', 'body', 'p', 'h1', 'h2', 'figcaption', 'figure', 'section', 'figure', 'a', 'picture', 'iframe', 'aside'] + def modify_tree(root: HtmlElement) -> None: + for maybe_code_root in root.xpath('.//*[@class]'): assert isinstance(maybe_code_root, HtmlElement) + if not any(['code' in class_name for class_name in maybe_code_root.classes]): continue - + # 应对list或者audio被识别为code的情况 + if maybe_code_root.tag in no_code_tags: + continue + if maybe_code_root.tag == 'div' and (any([child.tag in no_code_tags for child in maybe_code_root.iterchildren()]) or len([child for child in maybe_code_root.iterchildren()]) == 0): + continue if len(maybe_code_root.xpath(f'.//{CCTag.CC_CODE}')) > 0: continue @@ -20,12 +28,15 @@ def modify_tree(root: HtmlElement) -> None: def detect(root: HtmlElement) -> bool: for maybe_code_root in root.xpath('.//*[@class]'): assert isinstance(maybe_code_root, HtmlElement) + if not any(['code' in class_name for class_name in maybe_code_root.classes]): continue - + if maybe_code_root.tag in no_code_tags: + continue + if maybe_code_root.tag == 'div' and any([child.tag in no_code_tags for child in maybe_code_root.iterchildren()]): + continue if len(maybe_code_root.xpath(f'.//{CCTag.CC_CODE}')) > 0: continue - return True return False diff --git a/llm_web_kit/extractor/html/recognizer/list.py b/llm_web_kit/extractor/html/recognizer/list.py index 8ee3b136..38d57f38 100644 --- a/llm_web_kit/extractor/html/recognizer/list.py +++ b/llm_web_kit/extractor/html/recognizer/list.py @@ -124,6 +124,7 @@ def __extract_list_item_text_recusive(el: HtmlElement): is_sub_sup = el.tag == 'sub' or el.tag == 'sup' paragraph = [] result = {} + if el.tag == CCTag.CC_MATH_INLINE and el.text and el.text.strip(): paragraph.append({'c': f'${el.text}$', 't': ParagraphTextType.EQUATION_INLINE}) elif el.tag == CCTag.CC_CODE_INLINE and el.text and el.text.strip(): @@ -146,8 +147,11 @@ def __extract_list_item_text_recusive(el: HtmlElement): 'items': [] } for child in el.getchildren(): - child_list['items'].append(__extract_list_item_text_recusive(child)) - result['child_list'] = child_list + child_item = __extract_list_item_text_recusive(child) + if len(child_item) != 0: + child_list['items'].append(child_item) + if child_list['items']: + result['child_list'] = child_list else: if el.text and el.text.strip(): paragraph.append({'c': el.text, 't': ParagraphTextType.TEXT}) @@ -160,7 +164,8 @@ def __extract_list_item_text_recusive(el: HtmlElement): result['child_list'] = p['child_list'] # 添加子元素的文本内容 if 'c' in p: - paragraph.append({'c': p['c'], 't': p.get('t', ParagraphTextType.TEXT)}) + if p['c'] != '': + paragraph.append({'c': p['c'], 't': p.get('t', ParagraphTextType.TEXT)}) if el.tag != 'li' and el.tail and el.tail.strip(): if is_sub_sup: # 如果尾部文本跟在sub/sup后面,直接附加到最后一个文本段落中 @@ -171,9 +176,10 @@ def __extract_list_item_text_recusive(el: HtmlElement): else: paragraph.append({'c': el.tail, 't': ParagraphTextType.TEXT}) if paragraph: + # item['c'].strip(): 会导致前面处理br标签,添加的\n\n失效 result['c'] = ' '.join(normalize_text_segment(item['c'].strip()) for item in paragraph) return result - list_item_tags = ('li', 'dd', 'dt') + list_item_tags = ('li', 'dd', 'dt', 'ul', 'div', 'p') if child.tag in list_item_tags: paragraph = __extract_list_item_text_recusive(child) if len(paragraph) > 0: @@ -190,6 +196,7 @@ def __get_list_content_list(self, ele: HtmlElement, list_nest_level: int) -> lis Returns: list: 包含列表项内容的列表,即items """ + content_list = [] # 处理根元素文本 if ele.text and ele.text.strip(): diff --git a/llm_web_kit/extractor/html/recognizer/text.py b/llm_web_kit/extractor/html/recognizer/text.py index 4d4e1704..22cc07f9 100644 --- a/llm_web_kit/extractor/html/recognizer/text.py +++ b/llm_web_kit/extractor/html/recognizer/text.py @@ -40,6 +40,18 @@ '☁' # 云符号 ] +# 其他标点符 +other_symbols = [ + '“', + '‘', + '[', + '(', + '”', + '’', + '。', + ',' +] + PARAGRAPH_SEPARATOR = '\n\n' # 需要保留的html实体,例如:'>' 直接在markdown中无法渲染,需要替换为html实体 @@ -50,8 +62,9 @@ 'a', 'abbr', 'acronym', 'b', 'bdo', 'big', 'br', 'button', 'cite', 'code', 'dfn', 'em', 'i', 'img', 'input', 'kbd', 'label', 'map', 'object', 'q', 'samp', 'script', 'select', 'small', 'span', 'strong', 'sub', 'sup', - 'textarea', 'time', 'var', 'u', 's', 'code', 'cccode-inline', 'ccmath-inline', - 'marked-tail', 'marked-text','math','mspace' + 'textarea', 'time', 'var', 'u', 's', 'cccode-inline', 'ccmath-inline', + 'marked-tail', 'marked-text', 'math','mspace', 'font', 'nobr', 'bdi', + 'mjx-container', 'mjx-assistive-mml', 'strike', 'wbr', 'ins' } @@ -93,6 +106,7 @@ def recognize(self, base_url:str, main_html_lst: List[Tuple[HtmlElement | str, H new_html_lst = [] for html_element, raw_html_element in main_html_lst: # 如果是字符串则转换为 HtmlElement + if self.is_cc_html(html_element): new_html_lst.append((html_element, raw_html_element)) else: @@ -108,7 +122,9 @@ def __to_cctext_lst(self, lst: List[Tuple[HtmlElement | str, HtmlElement | str]] lst: List[Tuple[HtmlElement | str, HtmlElement | str]]: Element和raw_html组成的列表 """ new_lst = [] + for el, raw_html in lst: + # 如果是字符串则转换为 HtmlElement el_element = html_to_element(el) if isinstance(el, str) else el raw_html_element = html_to_element(raw_html) if isinstance(raw_html, str) else raw_html @@ -120,20 +136,45 @@ def __to_cctext_lst(self, lst: List[Tuple[HtmlElement | str, HtmlElement | str]] return new_lst def replace_entities(self, text, entities_map): - """使用正则表达式同时替换文本中的多个特定字符为其对应的HTML实体。 + """替换文本中指定字符为对应的HTML实体,但跳过HTML标签内的字符。 :param text: 需要处理的文本。 - :param entities_map: 一个字典,键是需要替换的字符,值是对应的HTML实体名 + :param entities_map: 字典,键是要替换的字符,值是对应的HTML实体名。 :return: 替换后的文本。 """ - # 创建正则表达式模式,匹配所有需要替换的字符 - rx = re.compile('|'.join(re.escape(str(key)) for key in entities_map.keys())) + if not entities_map: + return text # 如果字典为空,直接返回原文本 + + # 构建匹配需要替换字符的正则表达式 + entities_pattern = '|'.join(re.escape(str(key)) for key in entities_map.keys()) + rx_entity = re.compile(entities_pattern) + + # 构建匹配HTML标签的正则表达式 + rx_tag = re.compile(r'<[^>]*>') - def one_xlat(match): - """回调函数,用于将匹配到的字符替换为对应的HTML实体。""" - return f'&{entities_map[match.group(0)]};' + result = [] + last_pos = 0 - return rx.sub(one_xlat, text) + # 遍历所有HTML标签 + for tag_match in rx_tag.finditer(text): + start, end = tag_match.start(), tag_match.end() + + # 提取非标签部分并进行替换 + non_tag_part = text[last_pos:start] + replaced = rx_entity.sub(lambda m: f'&{entities_map[m.group(0)]};', non_tag_part) + result.append(replaced) + + # 保留HTML标签不变 + result.append(text[start:end]) + + last_pos = end + + # 处理最后剩余的非标签部分 + non_tag_part = text[last_pos:] + replaced = rx_entity.sub(lambda m: f'&{entities_map[m.group(0)]};', non_tag_part) + result.append(replaced) + + return ''.join(result) def __combine_text(self, text1:str, text2:str, lang='en') -> str: """将两段文本合并,中间加空格. @@ -149,7 +190,11 @@ def __combine_text(self, text1:str, text2:str, lang='en') -> str: txt = text1 + text2 return self.replace_entities(txt.strip(), entities_map) else: - words_sep = '' if text2[0] in string.punctuation or text2[0] in special_symbols else ' ' + # 根据text1的最后一个字符和text2的第一个字符判断两个text之间的连接 + if (text2[0] in string.punctuation) or (text2[0] in special_symbols) or (text2[0] in other_symbols) or (text1 and text1[-1] in other_symbols): + words_sep = '' + else : + words_sep = ' ' txt = text1 + words_sep + text2 return self.replace_entities(txt.strip(), entities_map) @@ -169,7 +214,6 @@ def __get_paragraph_text(self, root: HtmlElement) -> List[dict]: para_text = [] def __get_paragraph_text_recusive(el: HtmlElement, text: str) -> str: - # 标记当前元素是否是sub或sup类型 is_sub_sup = el.tag == 'sub' or el.tag == 'sup' @@ -187,6 +231,8 @@ def __get_paragraph_text_recusive(el: HtmlElement, text: str) -> str: text += PARAGRAPH_SEPARATOR # TODO 这个地方直接加换行是错误点做法,需要利用数据结构来保证段落。 elif el.tag == 'sub' or el.tag == 'sup': text = process_sub_sup_tags(el, text, recursive=False) + elif el.tag == 'audio': # 避免audio被识别为paragraph + pass else: if el.text and el.text.strip(): text = self.__combine_text(text, el.text.strip()) diff --git a/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/audio_to_code_exception.html b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/audio_to_code_exception.html new file mode 100644 index 00000000..12f6f1c1 --- /dev/null +++ b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/audio_to_code_exception.html @@ -0,0 +1,395 @@ + + + + + December | 2019 | Yoga For Life Blog + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+
+
+

Monthly Archives: December 2019

+
+
+
+

+ Bonus Week: Restorative: Rest, Restore, and Renew +

+
+
+

+ + +

Greetings from YFL!!! We wish you, your family, and friends the Happiest New Year!!! 2020 will be a fantastic new decade at YFL, and we look forward to sharing our normal class scheduling, as well as many helpful and interesting ‘Special Events’. We have quite a few workshops coming up in January, February, and March, so check them out in the January YFL Newsletter, which will be released on January 1st to your email desk at 10 a.m.!!!

+

This week at YFL will be the last holiday schedule, and Deb’s reduced schedule!!! Check it out below!!!

+

This is Bonus Week, so our physical practice will be an additional time for a Restorative Flow: Rest, Restore, and Renew!!! Our Sanskrit focus is ‘Karuna Hum’, or ‘I am compassion’. The hand gesture for our practice, or mudra is the ‘Equanimity’ gesture. This promotes balance, and releases tension. It also promotes a healthy thyroid gland. Our meditation focus is ‘Compassion’, and may be contemplated, while listening to ‘Music for Meditation’.

+

This week’s Yin practice will focus on promoting ‘Good Digestion’!!!

+

As always, we cant’ wait to see you on the mat!!!

+

SCHEDULE FOR 12/30/19-1/4/20:

+

Mon-8:15 a.m. & 9:30 a.m.-w/Deb

+

Tues, Wed, Thurs-CLOSED

+

Fri-8:15 & 9:30 a.m.-w/Deb

+

Sat-9:00 a.m.-w/Deb

+

We resume our regular teaching schedule on Monday, January 13th!!!

+

‘Bless this year with love and light. Bless this year with faith and sight. Bless this year with grace and ease. Bless this year with joy and peace.’-Mary Davis

+

 

+
+ +
+
+
+

+ Classic Hatha Week (A Gentle Variation)!!! +

+
+
+

+ +

Greetings from YFL!!! This week will feature a very gentle version of ‘Classic Hatha’!!! Our Sanskrit focus is AUM Prani Dhana, or my individuality connects to Universality. Our mudra (hand gesture) is the Jnana, which connects us to Universal energy, and is also the symbol for the ‘Holy Trinity’ in Christianity. This week’s meditation focus is ‘Light’, and may be contemplated, while listening to Kiran Murti’s ‘Divine Imagination’.

+

This week’s Yin practice will provide a sequence  for releasing the hips!!!

+

This week’s schedule is abbreviated due to the holidays, so here it is:

+

Mon-8:15 & 9:30 a.m. w/Deb

+

CLOSED-Tu, Wed, Thurs

+

Fri-8:15 & 9:30 a.m.-Deb

+

Sat-9:00 a.m.-Deb

+

Sophie, Summer, and I wish you a very Merry Christmas!!!  We have many exciting workshops planned for the New Year!!! They will be announced in the January Newsletter, which will come out on your email desk, January 1st, around 10 a.m.!!!

+

As always, we can’t wait to see you on the mat!!!  Your Christmas cards & gifts are still available on Monday this week, and we will be listening to Christmas music at class on Monday!!!—Namaste’, Deb

+

‘May good fortune walk with you this day and all the days of your life’.—Unknown

+
+ +
+
+
+

+ Let’s Meet at the Wall!!! +

+
+
+

+ +

Greetings from YFL!!! It is time to meet one another at The Wall!!! We will use this amazing prop to align, strengthen, and focus during our practice!!! Our Sanskrit focus is ‘Mudita’, or gathering ‘Vicarious Joy’ from one another during the holiday season!!! Our hand gesture (mudra) is the ‘Grounding’ gesture, which will help us to connect, and rest in ‘Spirit’s Energy’ during this Christmas season!!! Deb will be playing Classic Christmas music THIS WEEK, AND NEXT!!!  Santa and Deb have a gift for you at the studio, so come to the mat this week!!! Our meditation focus is ‘JOY’, and may be contemplated, while listening to ‘White Dove’.

+

This week’s Yin practice will balance the Chakras with poses, Tibetan bowls, color, and spinal locations.

+

This week’s schedule:

+

Mon-8:15 & 9:30 a.m.-Deb

+

Tues-12:30 & 6:00 p.m.-Sophie

+

Wed-8:15 & 9:30 a.m.-Deb

+

Thurs-12:30 & 5:30 p.m.-Sophie

+

Fri-8:15 & 9:30 a.m.-Deb

+

Sat-9:00 a.m.-Deb

+

12/23-1/4 SCHEDULE:

+

OPEN: 12/23, 12/27, 12/28  CLOSED: 12/24, 12/25, 12/26

+

OPEN: 12/30, 1/3, 1/4       CLOSED: 12/31, 1/1, 1/2

+

“Your mind will always believe everything you feed it. So feed it faith, feed it positive thoughts, feed it love!!!”-Think Positive Power

+

It will be a wonderful week at YFL, and as always, we cant’ wait to see you on the mat!!!—Namaste’, Deb

+

 

+
+ +
+
+
+

+ Triple ‘R’ Week: Rest, Restore, and Renew +

+
+
+

+ +

Greetings from YFL!!! I am happy to report that I am free of my cast, and walking very slowly and steadily once again!!! I am going to stay on a reduced teaching schedule through the end of December, and then resume the normal class schedule on January 3rd!!!

+

This week’s practice is all about restoration, as we enjoy Triple ‘R’ week: Rest, Restore, and Renew!!! Our Sanskrit focus is AUM AIEEM NAMAH, which translates as, “My true nature is love and joy”. Our mudra (hand gesture) is ‘RELAX’, or let it flow!!! Interlace the fingers, let the thumbs hang freely pointing down with palms facing down.

+

Our Yin practice will focus on ‘Happy Hips’, and will emphasize poses to release and open our hip joints.

+

Deb will be handing out Christmas Cards, and gifts the week of 12/16-12/21!!! So be sure to attend class for your present!!!

+

This week’s schedule will be as follows:

+

Mon-: 8:15 & 9:30 a.m.-Deb

+

Tues: 12:30 p.m. & 6:00 p.m.-Sophie

+

Wed: 8:15 & 9:30 a.m.-Deb

+

Thurs: 12:30 & 5:30 p.m.-Sophie

+

Fri: 8:15 & 9:30 a.m.-Deb

+

Sat: 9:00 a.m.-Deb

+

In the New Year (2020): Sophie will be offering Yoga For Emotional Liberation on 1/25

+

Kim will be offering Reiki I on 1/19 & a New Moon Rising Celebration

+

Deb will be offering Back to Basics Workshop & Meditation I (TBA)

+

It will be another great week at YFL, and we are looking forward to a great New Year in 2020!!! I will review the holiday schedule again in next week’s blog!!! As always, we can’t wait to see you on the mat!!!—Namaste’, Deb

+

“All is well’…that is my new philosophy”.—Sally from Peanuts

+

 

+
+ +
+
+
+

+ ‘Weight Time’ +

+
+
+

+ +

Greetings from YFL, and Happy December!!! Don’t forget your own self-well-being this month!!! Come to the mat often!!! This week we return to Deb’s classic thematic practice for each week of the month!!! This week is ‘Weight Time’!!! We will emphasize toning, sculpting, and strengthening using our body weight and optional hand weights!!! Our Sanskrit focus is ‘Bhakti’, or ‘Your Personal Path of Devotion’, and what that means to you!!! Our hand gesture (mudra) is the classic ‘Jhana’, or connection to ‘Universal Energy’. Our meditation focus is ‘Holiday Cheer’, and the music selection this week for contemplation is ‘Winds of Samskara’s’, Heaven is Here.

+

The monthly YFL newsletter was released today, and if you aren’t on our newsletter mailing list, just email Deb, and we will gladly add you to our Contact List!!!

+

This week’s schedule:

+

Mon-8:15 & 9:30 a.m.-Deb

+

6:30 p.m. (Qigong)-Jan

+

Tues-12:30 p.m.-Sophie

+

6:00 p.m.-Sophie

+

Wed-8:15 a.m.-Sophie

+

9:30 a.m.-Joyce

+

Thurs-12:30 p.m.-Sophie

+

5:30 p.m.-Sophie

+

Fri-8:15 &9:30 a.m.-Deb

+

Sat-9:00 a.m.-Deb

+

Holiday Schedule: CLOSED: 12/24-12/26/19   &   12/31-1/2/20!!!

+

Give the gift of ‘HEALTH & WELL-BEING’ this Holiday Season!!! See Deb about YFL gift certificates!!!

+

It will be a wonderful first week of December at YFL, and as always, we cant’ wait to see you on the mat!!!—Namaste’, Deb

+

“Go confidently in the direction of your dreams”.—Henry David Thoreaux

+
+ +
+
+
+ +
+ +
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/double_ul.html b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/double_ul.html new file mode 100644 index 00000000..c694c7f9 --- /dev/null +++ b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/double_ul.html @@ -0,0 +1,285 @@ + + + + + + + + + + + Honda Civic 1.0 VTEC TURBO SR 5-Door Approved Used vehicle, by Campbell Honda Newry + + + + + + +

+ + + + + + + + + + + + a + + + + + + + + + + + + + + + + + + + + +

+
FavouritesMy ComparisonSaved Searches
+

Honda Civic 1.0 VTEC TURBO SR 5-Door

£16,295
Honda Civic 1.0 VTEC TURBO SR 5-Door
Honda Civic 1.0 VTEC TURBO SR 5-Door
Honda Civic 1.0 VTEC TURBO SR 5-Door
Honda Civic 1.0 VTEC TURBO SR 5-Door
Honda Civic 1.0 VTEC TURBO SR 5-Door
Honda Civic 1.0 VTEC TURBO SR 5-Door
Honda Civic 1.0 VTEC TURBO SR 5-Door
Honda Civic 1.0 VTEC TURBO SR 5-Door
Honda Civic 1.0 VTEC TURBO SR 5-Door
Honda Civic 1.0 VTEC TURBO SR 5-Door
Honda Civic 1.0 VTEC TURBO SR 5-Door
Honda Civic 1.0 VTEC TURBO SR 5-Door
Honda Civic 1.0 VTEC TURBO SR 5-Door
Honda Civic 1.0 VTEC TURBO SR 5-Door
Honda Civic 1.0 VTEC TURBO SR 5-Door
Honda Civic 1.0 VTEC TURBO SR 5-Door
Honda Civic 1.0 VTEC TURBO SR 5-Door
Honda Civic 1.0 VTEC TURBO SR 5-Door
Reg Number: OY67UBO
1st reg date: 2017
Exterior: Black Metallic
Mileage: 11,500 miles
Fuel Type: Petrol
Transmission: Manual
Consumption: 55 mpg
CO2 Emission: 117 g/km
Insurance: 15
+
+
+ Honda Quality Plus +
+
+
Campbell Honda
Shore Road
Newry, , BT34 3AA

+
+
+

Fitted as standard

+
+
    +
      +
    • Wheels and Tyres
    • 17In Alloy Wheels
    • Tyre Pressure Control
    • Instruments and Controls
    • Front/Rear Electric Windows
    • Satellite Navigation
    • Front/Rear Parking Aid
    • Audio/Gears Steering Wheel Mounted Controls
    • Rain Sensor
    • Speed Limiter
    • Safety
    • Electronic Brake Force Distribution
    • Anti-Lock Brakes
    • Driver, Passenger And Front Side Air Bags
    • Centre Rear Seat Belt
    • Security
    • Alarm
    • Remote Central Door Locking
    • Immobiliser
    • Exterior
    • Tinted Glass
    • Front Fog Lights
    • Electric/Heated Door Mirrors
    • LED Daytime Running Lights Exterior Lighting
    • Gear Knob Leather
    • Comfort and Convenience
    • Automatic Air Conditioning
    • Cruise Control
    • Front/Rear Armrest
    • Whiplash Protection System
    • Power-Assisted Steering
    • Heated Rear Screen
    • Map Interior Lights
    • Centre Console
    • Seating
    • Driver Seat Height Adjustment
    • Rear Seat - ISOFIX Anchorage Point
    • Driver Seat Lumbar Support
    • In Car Entertainment (ICE)
    • On-Board Monitor
    • Eight Speakers
    • Miscellaneous
    • Hill Holder
    • Safety System
    +
+
+
+
Performance
Acceleration (0-60mph)
10.9 secs
Max torque
200 nm
Max torque imperial
147.512 lb/ft
Cubic Capacity
988 cc
Power
127 PS
Economy
Litres/100 km Urban**
6.40 l/100km
Litres/100 km Extra Urban**
4.40 l/100km
Litres/100 km combined**
5.10 l/100km
Mpg urban**
44.1 mpg
Mpg extra urban**
64.2 mpg
Mpg combined**
55.4 mpg
Engine
Tank capacity
46 l
Number of cylinders
3
Dimensions
Kerb Weight
1275 kg
Vehicle Length
4518 cm
Vehicle Width
2076 cm
Vehicle Height
1434 cm
Vehicle Wheelbase
2697 mm
Environment
Co2 Emissions**
117.0 g/km
Buying you next new Honda is an exciting experience. You will be benefiting from our legendary reliability and be assured that your car is prepared to the highest standards by Honda trained technicians and supported by a full Honda guarantee. + + Honda cars offer exceptional long-term value, technical innovation, distinctive looks and character. You will also be able to choose from a range of bespoke service packages to keep your car as good as new. With around 200 dealers nationwide the network ensures outstanding customer service at your convenience. + + At any one time there are up to 8,000 used cars to choose from on this website, providing you with the best choice of Approved Honda cars anywhere.

Complimentary 5-day drive-away insurance
An optional service that starts the second you leave the dealership. This means you can enjoy driving your Honda immediately, safe in the knowledge that we’ve got everything covered.
A range of financial solutions
To help you buy the car of your dreams, we can provide flexible finance options that are tailored to your personal requirements.
12 month Honda Approved Used Car Guarantee
The guarantee, just like the new car warranty, is provided by Honda and includes Hondacare Assistance, our bespoke roadside assistance package, which provides cover in the UK and Europe. We’ll even come out to you should you have a puncture or a flat battery. You also have the option to extend your guarantee at the end of 12 months.
Rigorous checking and preparation
Our Honda ‘Multipoint’ check, carried out by Honda trained technicians, ensures that your new car meets our high quality standards. Our multi-point includes lighting equipment and instruments, steering and suspension, brakes, and even body work. + + Your car will also undergo a detailed engine check and a thorough road test for your safety and satisfaction. Finally, your car goes through an in-depth valet and polish so that it’s fully prepared when you come to drive it away.
Assured vehicle history
Your Approved used Honda is comprehensively checked to confirm; previous ownership, any outstanding finance has been settled, has incurred no major accident damage, and any registration number change are noted. You will be provided with certification on delivery of your vehicle.
Mileage certification
Any vehicle which has not been owned and used by Honda UK or our Dealer Network will have the mileage checked against previous history, including writing to previous owners to ensure that there are no mileage irregularities. You will be provided with certification on delivery of your vehicle.
Preferential MOT Test
Your Honda dealer will carry out or arrange the ANNUAL MOT TEST on your car at preferential terms in conjunction with your annual scheduled service. You will benefit from this reduced cost on the understanding that the MOT TEST is carried out by your supplying dealer.
30-day exchange policy
Your new Honda approved used car has been subject to rigorous Multi Point Pre Sales check to give you the assurance that you are purchasing an unrivalled quality car, however, we undertake that, should any mechanical or electrical defect occur within the first 30 days or 1000 miles of your ownership which cannot be rectified by an authorised Honda dealer, and providing that the car is in the same condition when purchased then your supplying dealer will exchange it for another of equivalent or greater value.* + + *Please refer to your nearest dealer for specific terms and conditions or refer to link on this site.

Finance this car

Honda Finance

Campbell Honda

Call us: + Shore Road
Newry, BT34 3AA
Get directions
Opening Image

Car Sales Opening Hours

Monday09:00 - 17:30
Tuesday09:00 - 20:00
Wednesday09:00 - 17:30
Thursday09:00 - 20:00
Friday09:00 - 17:30
Saturday09:00 - 13:00
+
+ +
+

Similar Vehicles

/
Please note some vehicles advertised on this website may have been used for business purposes (e.g. previous manufacturers vehicle, previous fleet vehicle) and/or had multiple users. Please ensure you ask the selling Dealer about your chosen vehicle(s) when making your enquiry.
Back to top
+ + \ No newline at end of file diff --git a/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/list_item_notext.html b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/list_item_notext.html new file mode 100644 index 00000000..b895eca6 --- /dev/null +++ b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/list_item_notext.html @@ -0,0 +1,819 @@ + + + + + + + + + + P Archives - BlueReg Group + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ + +
+
+ +
+ + +
+ +
+ +
+
+ + +
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/list_to_code_exception.html b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/list_to_code_exception.html new file mode 100644 index 00000000..f49021ea --- /dev/null +++ b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/list_to_code_exception.html @@ -0,0 +1,1460 @@ + + + + + + + + + M S Ramaiah Institute of Technology | MSRIT | Bangalore | Karnataka - B.Tech Admission in Top Engineering + College + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+
+
+
+

M S + Ramaiah Institute of Technology | MSRIT | Bangalore | Karnataka

+ +
+
+
+
+
+
+
+
+
+
+ +
+
+ +
+
+
+
+

Image result for ms ramaiah institute of technology logo

+

MSRIT Logo

+
+
    +
  • Accrediated by NAAC with + ‘A’ Grade
  • +
  • ISO 9001:2008 + Certified|
  • +
  • Address | MSR College Road, MSR Nagar, Bengaluru, + Karnataka 560054.
  • +
  • Ranking In + Karnataka || 04
  • +
  • Established | 1962 | Bengaluru | Karnataka + |  India
  • +
  • Affiliated | VTU       +  
  • +
  • Management | Trust
  • +
  • Admission Mode | + Comed-K & Management Quota
  • +
  • College Type | Private + Co-educational | AUTONOMUS Institute
  • +
  • Courses Offered in MSRIT | B.E/B.Tech | M.E/M.Tech | MCA | MBA | + P.hd 
  • +
  • Approved By |AICTE | + Affiliated to VTU | NBA | NAAC 
  • +
+
+

aicte-logoImage result for vtu logonaacNBA

+
+

Placements Overview

+

years |comp visited |student placed

+

2013             +  181                       +  1095
2014       +        228                   +     1275
2015       +        268                   +     1787
2016         +      358                     +   1814

+
+

M. S. Ramaiah Institute of Technology | + MSRIT

+

MSRIT has been academically autonomous for both + Undergraduate and Postgraduate Programs.While VTU will still award the degrees, the college has + academic freedom in framing its own schemes of study, curricula and student evaluation. MSRIT + offers undergraduate and graduate courses of study and research including Bachelor of Engineering, + Bachelor of Architecture, Master of Technology, Master of Business Administration and offers + programs leading to award of PhD in various disciplines.In 2013, MSRIT + was ranked 16th best engineering College + in India by India Today and Neilson.

+

+
+

Rating by National Media

+
    +
  • MSRIT was + ranked 21st in India by India Today in 2007.
  • +
  • MSRIT was + put on 3rd spot in the list of top colleges in India by national weekly Outlook in 2007.
  • +
  • The college + was ranked 22nd among private institutions in India by the weekly Outlook.
  • +
  • The college + was ranked 6th by College Khabar in 2012.
  • +
  • + Cetinformation.com rated the college as the 3rd best in Karnataka.
  • +
  • MSRIT was + ranked 16 among top 25 engineering colleges in INDIA by INDIATODAY survey according to 2013.
  • +
+
+

Engineering B.E/B.Tech Programs Offered in + MSRIT

+

Image result for ms ramaiah INSTITUTE OF TECHNOLOGY

+

B.E Architecture Engineering + | The school of architecture, MSRIT, Bangalore, + started in the year 1992. Since its establishment, the school has played a vital role in providing + quality education. The Council of Architecture and AICTE has recognized this + program.

+
    +
  • Exam + Required – NATA
  • +
  • Duration : 5 + Years, Full Time, Under Graduate Degree.
  • +
  • Eligibility + | Must have appeared for NATA conducted by the + Council of Architecture. In order to pass the Architectural Aptitude Test, a candidate must + obtain a minimum of 40% marksCandidate must have passed 10+2/Pre-University Course or equivalent + with minimum 50% of marks in aggregate and has studied Mathematics and English as compulsory + subjects .
  • +
+
+
+
+

Seat Matrix B.E | Engineering

+

revpp

+
    +
  • B.E Computer Science and + Engineering Intake : 120 |seats 
  • +
  • B.E.  Information Science and + Engineering Intake : 60 |seats
  • +
  • B.E. Electronics and Communication + Engineering Intake : 60 |seats 
  • +
  • B.E. Mechanical Engineering  Intake + : 120| seats 
  • +
  • B.E.  Civil Engineering Intake + : 60| seats 
  • +
  • B.E. Electrical and Electronics + Engineering Intake :60| seats 
  • +
  • B.E Chemical Engineering Intake : 60 + |seats 
  • +
  • B.E Industrial + Management Engineering Intake : 60 |seats 
  • +
  • B.E Medical Electronics Engineering Intake + : 60| seats 
  • +
  • B.E Biotechnology Engineering Intake + : 30| seats 
  • +
+
+
    +
  • Duration : 4 years +
  • +
  • Eligibility | Admission to undergraduate + programmes in engineering shall be open to candidates who have passed Karnataka + 2nd PUC / 12th standard or equivalent + examination recognized by AICTE and State Government in Physics and Mathematics subjects along + with Chemistry / Electronics / Biology / Biotechnology / Computer Science as optional subjects + and English as one of the languages of study. Students must have obtained a minimum of 45% + marks in aggregate (40% for SC/ST/OBC). The candidate must have also qualified in one of the + following entrance exams: CET/ COMED-K / JEE / AIEEE.
  • +
+
+

Post Graduate M.Tech ProgrammesImage result for ms ramaiah institute of technology

+
    +
  • M.Tech in Computer Science and Engineering | 18 + seats
  • +
  • M.Tech in Computer Networking | 18 seats +
  • +
  • M.Tech in Digital Electronics & Communication + | 36 seats
  • +
  • M.Tech in VLSI Design – Embedded Systems | 18 + seats
  • +
  • M.Tech in Machine Design | 18 seats
  • +
  • M.Tech in Signal Processing | 24 seats
  • +
  • M.Tech in Computer Application in Industrial Drives |18 + seats
  • +
  • M.Tech in Manufacturing Science & Engineering | 36 + seats
  • +
  • M.Tech in Computer Integrated Manufacturing. | 18 + seats
  • +
  • M.Tech in Structural Engineering  | 18 + seats
  • +
  • M.Tech in Software Engineering | 18 seats +
  • +
  • Elligibility :  PGCET or + GATE | Admission to M.Tech programmes shall be open to + candidates who have passed the Bachelor’s Degree examinations with not less than 50% marks in + the aggregate of all the semesters of the degree examinations (45% for SC/ST candidates + belonging to Karnataka). The candidate shall have passed Bachelor’s Degree as per the + requirements and must have also taken at least one of the prescribed qualifying examinations as + specified below.
  • +
+
+

Placement 

+

For + the current batch [batch of 2016], placements are still in progress. For computer science and + information science, there are amazing companies visiting the campus.Service companies/mass + recruiters pay from around 3-4 lakhs/year. These companies take in a lot of students and are usually + open to all branches.There are mid tier companies which pay from 5-8 lakhs.Then, there are high + paying companies/Tier 1 companies, offering a CTC from anywhere between 9 to 16 lakhs/year. These + companies usually pick only a handful of students.

+

The highest package this + year, till date is 50Lakhs/year. You can + read more about it in this article: Bengaluru + engineering  student lands job with Rs 50L salary – Times of IndiaSo, the + average package will be around 5-7 lakhs/year for CS students.

+

Image result for ms ramaiah INSTITUTE OF TECHNOLOGY placements

+

Top Recruiter In MSRIT

+

Image result for ms ramaiah INSTITUTE OF TECHNOLOGY placements

+
+

Department Of Placement

+
+
MSRIT is one of the preferred + academic institutes for a large number of organizations for recruiting our + graduates.
The biggest names in the + corporate world visit the campus on a regular basis, with enviable recruitment + offers.
Most of the top companies + across the nation, visit us for Campus Recruitment.
There is tremendous competition amongst companies + to visit the Campus to recruit the best talent of the institution.
Our MSRIT has been the traditional recruitment + destination for many organizations.
The institution stands as the most preferred + destination for students who aspire to grow, learn and reach beyond the ordinary, reason why some + of the world’s
+
+

msrit6

+
+
+
+
+

Top Searched + Links By ENGINEERINGENQUIRY.COM

+
    +
  • Top Medical College Karnataka | Top Medical College + Bangalore | Top Raked College Karnataka
  • +
  • Top 20 Engineering college Bangalore | Genuine rank of + Top Engineering college
  • +
  • Top 25 Pharma college india | NIRF Ranking| 2017 + Genuine Ranking | Top Pharma Colleges India
  • +
  • Top 20 Pharmacy College Bangalore | Top Pharma + College Bangalore | Genuine Ranking
  • +
  • List Of Top Dental College Karnataka | Top 35 + Dental College Karnataka | Genuine Ranking
  • +
  • Top 20 Engineering College Pune | Genuine Ranking | + Contact
  • +
  • Top 20 MBA | PGDM | Management Colleges in Mumbai | Pune +
  • +
  • Top 20 MBA / PGDM / Management Colleges in Bangalore
  • +
  • Top 20 MBA / PGDM / Management Colleges in Delhi-NCR
  • +
  • Top 20 Computer Application BCA | MCA College Of + Bangalore | India
  • +
  • Top 25 Management College | BBA | BBM | BMS in Bangalore +
  • +
  • Top 25 Nursing Colleges Bangalore | B.sc Nursing | M.sc + Nursing | P.B Nursing
  • +
  • Top 20 + Law College Pune | Mumbai | LLB | BALLB | BBALLB
  • +
  • Top 20 Law College + Bangalore | LLB | BA LLB | BBA LLB | LLM | Ph.D
  • +
+
+
+
+
+ +
+

+

+

+

+

+ +
+
+
+
+ + + +
+
+
+ +
+
+

Leave a + Comment

+
+

+

+ +

+

+

+

+

+
+
+
+
+
+ +
+
+ +
+
+
+
Show Buttons +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Hide Buttons +
+
+
+ + +
+ + + + + + + + + + + + + + + +
+
%d bloggers like + this:
+ +
+ + + + + + + \ No newline at end of file diff --git a/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/sub_sup_exception.html b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/sub_sup_exception.html new file mode 100644 index 00000000..db72200a --- /dev/null +++ b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/sub_sup_exception.html @@ -0,0 +1,838 @@ + +
+
+
+
+
+
+

Estrous cycle-dependent changes of Fas expression in the bovine corpus luteum: influence of keratin 8/18 intermediate filaments and cytokines

+
+

+ Alice Duncan1, Jennifer Forcina1, Alyssa Birt2 and David Townson1*

+
+
+
+ +

Author Affiliations

+
+
+

+ 1 Department of Molecular, Cellular and Biomedical Sciences, University of New Hampshire, Durham, NH, USA +

+

+ 2 Department of Dairy and Animal Science, The Pennsylvania State University, University Park, PA, USA +

+
+

For all author emails, please log on. +

+
+
+
+
+
+
+
+
+

Reproductive Biology and Endocrinology 2012, 10:90  + doi:10.1186/1477-7827-10-90

+

The electronic version of this article is the complete one and can be found online at: http://www.rbej.com/content/10/1/90

+
+ + + +
Received:29 August 2012
Accepted:26 October 2012
Published:31 October 2012
+
+

+ © 2012 Duncan et al.; licensee BioMed Central Ltd.
+

+

+ This is an Open Access article distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/2.0), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. +

+
+
+
+
+ +

Abstract

+
+

Background

+

Fas expression and Fas-induced apoptosis are mechanisms attributed to the selective + destruction of cells of the corpus luteum (CL) during luteal regression. In certain + cell-types, sensitivity to these death-inducing mechanisms is due to the loss or cleavage + of keratin-containing intermediate filaments. Specifically, keratin 8/18 (K8/K18) + filaments are hypothesized to influence cell death in part by regulating Fas expression + at the cell surface. +

+

Methods

+

Here, Fas expression on bovine luteal cells was quantified by flow cytometry during + the early (Day 5, postovulation) and late stages (Days 16–18, postovulation) of CL + function, and the relationship between Fas expression, K8/K18 filament expression + and cytokine-induced cell death in vitro was evaluated. +

+

Results

+

Both total and cell surface expression of Fas on luteal cells was greater for early + versus late stage bovine CL (89% vs. 44% of cells for total Fas; 65% vs.18% of cells + for cell surface Fas; respectively, P<0.05, n=6-9 CL/stage). A similar increase in + the steady-state concentration of mRNA for Fas, as detected by quantitative real-time + polymerase chain reaction, however, was not observed. Transient disruption of K8/K18 + filaments in the luteal cells with acrylamide (5 mM), however, had no effect on the + surface expression of Fas (P>0.05, n=4 CL/stage), despite evidence these conditions + increased Fas expression on HepG2 cells (P<0.05, n= 3 expts). Exposure of the luteal + cells to cytokines induced cell death (P<0.05) as expected, but there was no effect + of K8/K18 filament disruption by acrylamide (P>0.05) or stage of CL (P>0.05, n= 4 + CL/stage) on this outcome. +

+

Conclusion

+

In conclusion, we rejected our null hypothesis that the cell surface expression of + Fas does not differ between luteal cells of early and late stage CL. The results also + did not support the idea that K8/K18 filaments influence the expression of Fas on + the surface of bovine luteal cells. Potential downstream effects of these filaments + on death signaling, however, remain a possibility. Importantly, the elevated expression + of Fas observed on cells of early stage bovine CL compared to late stage bovine CL + raises a provocative question concerning the physiological role(s) of Fas in the corpus + luteum, particularly during early luteal development. +

+
+
+
Keywords:
Apoptosis; Corpus Luteum; Cytokines; Cytoskeleton; Fas; Ovary

Background

+
+

The receptor molecule CD95 (Apo-1) or Fas, is considered an integral component of + immune-response mechanisms within the corpus luteum (CL) which potentially influence + luteal function. It is a member of the TNF receptor superfamily [1] and is thought of as the prototypical death receptor because when bound by Fas ligand + (FasL), cells undergo apoptosis [2]. The binding of FasL to Fas triggers trimerization of Fas receptor on the cell surface. + This complex then leads to the activation of Fas associated death domain and pro-caspase-8 + proteins. The cleavage of pro-caspase-8 signals the caspase cascade, which then leads + to the activation of pro-caspase-3 and apoptosis [3,4]. Indeed, in the cow, expression of Fas mRNA within the CL occurs throughout the luteal + phase [5], and exposure of luteal cells to FasL, induces apoptosis [5,6]. Recently, Kliem and coworkers determined Fas and FasL mRNA increase in bovine CL + within 30 min to 2 h of injecting cows with a luteolytic dose of prostaglandin F2-alpha + [7], further supporting the death-inducing role of Fas and FasL in the CL. These observations + collectively suggest Fas-induced mechanisms within the bovine CL constitute a plausible + pathway for the cell-specific death observed during luteal regression. +

+

The attractiveness of the Fas-induced death pathway in luteal regression is that it + is relatively conserved among species and it provides for the selective elimination + of cells (i.e., via apoptosis) without invoking an inflammatory response. Indeed, + regression of the CL is characterized by cells undergoing apoptosis while neighboring + cells remain unaffected [8]. The relative amount of expression of Fas on the surface of luteal cells might account + for at least some of this selectivity and specificity, but this has not been directly + evaulated in the CL. Instead, most studies to date have examined only gross expression + of Fas mRNA or FasL in luteal tissue to propose a role for the Fas-FasL system in + luteal function. In addition, potential mechanisms influencing Fas expression on the + luteal cell surface have yet to be explored. Here we speculated cytoskeletal components, + specifically intermediate filaments, regulate expression of Fas on the surface of + luteal cells, and hence lend specificity to the process of Fas-induced apoptosis of + luteal cells in the CL. +

+

The cytoskeleton of cells consists of microtubules, microfilaments, and intermediate + filaments. Intermediate filaments have a diameter ranging between 7–11 nm and consist + of a family of five different subtypes [9]. One of the subtypes is the keratin-like proteins, which are found in epithelial + tissues, including the steroidogenic cells of ovarian follicles and CL [10-16] . Keratin filaments are obligate heterodimers, forming filaments of an acidic keratin + (type I, K9-K20), and a basic keratin (type II, K1-K8) [9,17]. The more prominent types of keratin filaments found in epithelial cells include + filaments containing K7, K8, K18, and K19 [9]. In the bovine CL, K8/K18 filaments are observed in luteal cells throughout the estrous + cycle, yet their relative expression diminishes with advancing age of the tissue [16]. Functionally, K8/K18 filaments provide structural integrity to cells, but they also + influence intracellular transport mechanisms and signaling [18,19]. In particular, the expression of these filaments in certain types of epithelial + cells provides a mechanism of resistance to apoptosis. For instance, K8/K18 filaments + in hepatocytes impair cytokine receptor trafficking and cell surface expression [20-22]. Whether or not K8/K18 filaments similarly impair Fas expression on luteal cells + has not been tested. +

+

In the present study, the objective was to quantify Fas expression on bovine luteal + cells during the early developmental (Day 5, postovulation) and late functional stages + (Days 16–18, postovulation) of the CL, examine the relationship between luteal Fas + and K8/18 filament expression, and assess the susceptibility of the luteal cells to + cytokine-induced death. Our null hypothesis was that the surface expression of Fas + on luteal cells does not differ between the two stages of corpora lutea (i.e., early + vs. late stage CL). In addition, we tested whether the disruption of K8/K18 filaments + in the luteal cells increases the cell surface expression of Fas, and thus their susceptibility + to cytokine-induced apoptosis. Experimentally, cultures of bovine luteal cells from + early and late stage CL were exposed acutely to acrylamide to disrupt the K8/K18 filaments. + The effects of filament disruption on Fas expression and cytokine-induced apoptosis + were then measured. +

+
+
+

Methods

+
+

Collection of bovine corpora lutea for dissociation and Q-RTPCR

+

All animal studies described herein were approved by the UNH Institutional Animal + Care and Use Committee (IACUC# 090205). Estrous cycles of Holstein dairy cows were + monitored using transrectal ultrasonography, and corpora lutea (CL) were removed by + colpotomy at days 5 (early stage; n=6 cows) and 16–18 (late stage; n=9 cows) postovulation + (ovulation = day 0). Luteal cells obtained from CL at these two stages of luteal function + express relatively high and low amounts of keratin intermediate filaments, respectively, + based upon previous findings [16,23]. Prior to CL removal, blood samples were obtained by coccygeal venipuncture using + heparinized tubes to measure plasma progesterone concentration and verify the relative + stage of the estrous cycle. Corpora lutea and blood samples were transported to the + laboratory on ice where the CL were extracted for total RNA (described below) and + enzymatically dissociated using collagenase type I (Worthington, Lakewood, NJ) as + described previously by others [24]. Following enzymatic dissociation, the viability of the luteal cells was estimated + to be 88-93% as determined by trypan blue exclusion. The dissociated luteal cells + were then either freshly-fixed in paraformaldehyde for flow cytometric analysis, or + placed in serum-free culture for further experimentation (described below). The heparinized + blood samples from the cows were centrifuged at 2056xg for 20 min at 4°C to obtain + plasma, which was then frozen at −20°C until assayed for progesterone by radioimmunoassay + (RIA) as described previously [25]. +

+

Total RNA was isolated from the two stages of bovine CL (n= 5–7 CL/stage) using a + Quick-RNATM Mini Prep kit (Zymo Research, Irvine, CA). The total RNA was then purified from genomic + DNA contamination using RQ1 RNase-Free DNase (Promega, Madison,WI). The purified total + RNA was reverse-transcribed to synthesize cDNA using the qScript™ cDNA Synthesis Kit + (Quanta Biosciences, Gaithersburg, MD). The cDNA was then used for subsequent quantitative + real time polymerase chain reaction (Q-RTPCR) with SyBr Green detection (Quanta Biosciences, + Gaithersburg, MD). Sequence-specific primers for bovine Fas and β-actin (an internal + control gene), validated previously by Vickers et al.[26] and Taniguchi et al.[5], respectively, were as follows: +

+

Forward and reverse primers, respectively for bovine Fas were: 5-ATGGGCTAGAAGTGGAACAAAAC-3 and 5- TTCTTCCCATGACTTTGATACC-3. Forward and reverse primers, respectively, for bovine β-actin were: 5- GAGGATCTTCATGAGGTAGTCTGTCAGGTC-3 5-CAACTGGGACGACATGGAGAAGATCTGGCA-3. +

+

A thermal cycler was used to conduct the Q-RTPCR with the cyclic conditions as follows: + an initial Taq activation at 95°C for 2 min, followed by 40 cycles of 95°C for 1 second, + 55°C for 30 seconds and 72°C for 30 seconds. All reactions were carried out on a 7500 + Fast Real-Time PCR System. The data were collected during the last 30 seconds of cycling + and the amplification signals of Fas transcripts were quantified using a standard + curve based upon an absolute quantitation method. The results were expressed as a + ratio of Fas relative to β-actin transcripts as the reference (i.e., internal control + gene). Melting curve analysis was performed with conditions as follows: 95°C for 15 + seconds, 60°C for 1 min, and 95°C for 15 seconds. +

+

Culture of bovine luteal cells and disruption of K8/K18 filaments with acrylamide

+

Freshly dissociated luteal cells were seeded in T25 flasks at a density of 2×106 viable cells/flask and in 8-well microchamber slides at 2×104 viable cells/well. The cells were cultured in serum-free Ham’s F12 culture medium + (Invitrogen, Carlsbad, CA) supplemented with insulin, transferrin, selenium (ITS; + 5μg/5μg/5ng/mL; Sigma Aldrich, St. Louis, MO) and gentamicin (20μg/mL; Invitrogen, + Carlsbad, CA) and incubated at 37°C, 5% CO2 in air and 95% humidity overnight. The purity of the cultures under these serum-free + conditions is estimated to be 70-75% steroidogenic cells because other types of cells + (e.g., endothelial cells, fibroblasts, etc.) are unable to persist. The day after + seeding, the flasks and chamber slides were rinsed and the conditioned medium replaced + with fresh culture medium prior to treatments. Initial treatments consisted of flasks + and chamber slides treated with either culture medium (control) or 5mM acrylamide + (Fisher Scientific, Pittsburgh, PA) for 4 h to disrupt K8/K18 filaments and potentially + increase the cell surface expression of Fas. Acrylamide is a selective, reversible, + disrupter of K8/K18 filaments in mammalian cells [27] that under short-term culture conditions does not adversely affect microtubules [28,29], organelles (e.g., mitochondria, [30]), steroid synthesis [31], or cell viability [11]. After the initial 4 h treatment period, all flasks and chamber slides were rinsed + twice and the medium replaced. Cells from several flasks were immediately prepared + for flow cytometric analysis of Fas and K8/K18 expression as described below. The + remaining flasks were treated with a cytokine cocktail containing bovine interferon-γ + (IFN, 200 IU/mL; R&D Systems, Minneapolis, MN), murine tumor necrosis factor-α (TNF, + 10ng/mL; US Biological, Swampscott, MA), and human recombinant soluble Fas ligand + (FasL, 50ng/mL; R&D Systems, Minneapolis, MN) with a murine monoclonal anti-6x histidine + cross-linking antibody (1mg/mL; R&D Systems, Minneapolis, MN) for 24 h to induce cell + death. Others have previously shown this mixture of cytokines is appropriate, and + necessary, to induce Fas-mediated death of bovine ovarian steroidogenic cells in vitro[5,6,26,32,33]. After 24 h incubation, the flasks were re-treated with the cytokine cocktail for + an additional 24 h, prior to assessment of cytokine-induced cell death. +

+

Cell death counts

+

Cytokine-induced cell death in the cultured luteal cells was assessed at three different + times during the experiment. The number of attached cells in five random microscopic + fields of view was counted in all of the flasks prior to cytokine treatment using + a 0.25 mm2 grid (initial cell counts). At 24 and 48 h after treatment, the number of attached + cells in the flasks was again counted to estimate cell loss (post-treatment cell counts). + All five fields of view per flask were averaged and the percent cell death was determined + using the following equation: +

+
+ + + + + % + Cell Death + = + + + ( + 1 + + ( + Post treatment cell counts + + + + + / + initial cell counts + ) + ) + * + 100 + + + + (1)
+

Culture of HepG2 cells

+

Murine hepatocytes were among the first cells used to demonstrate that disruption + of K8/K18 filaments enhances Fas trafficking to the cell surface [20]. Here we utilized human hepatocyte carcinoma cells (HepG2 cells) to corroborate this + finding under the experimental conditions used to disrupt K8/K18 filaments in bovine + luteal cells with acrylamide. Briefly, HepG2 cells were seeded into T150 flasks at + 2×106 cells/flask. The cells were cultured in Eagle’s Minimal Essential Medium (Sigma Aldrich, + St. Louis, MO) supplemented with 10% fetal bovine serum (JRH Biosciences, Lenexa, + KS) and incubated at 37°C, 5% CO2 and 95% humidity. At approximately 70% confluency, the HepG2 cells were subcultured + in T25 flasks using approximately 1x106 cells/flask. The following day, the medium was changed and the cultures were exposed + to vehicle (control) or 5mM acrylamide for 4 h. Following treatment, the cultures + were prepared for flow cytometry to assess cell surface expression of Fas. +

+

Fixation of bovine luteal cells and HepG2 cells for flow cytometric analysis

+

Luteal cells from freshly dissociated CL and from serum-free culture were used to + analyze Fas and K8/K18 filament expression by flow cytometry. For cells obtained through + dissociation of CL, approximately 1.5x106 cells/tube in 0.4mL of Ham’s F12 culture medium were centrifuged using screen-capped + tubes (Ref # 352235, BD Falcon, San Jose, CA) for 5 min at 276xg, 4°C. The screened + cells were then fixed for 2 h on ice by adding 0.4mL 2% paraformaldehyde to the cell + suspension for a final concentration of 1% paraformaldehyde. After fixation, the cells + either remained in fixative (for detection of cell surface Fas) or were rinsed twice + with PBS and then permeabilized using 70% ethanol (for detection of total Fas and + K8/K18 filament expression). Both the fixed and permeabilized cells were stored at + 4°C and −20°C, respectively, until further processed for flow cytometry. +

+

Luteal cells in serum-free culture and the HepG2 cells cultured in serum-containing + conditions were fixed in a similar manner to the freshly isolated luteal cells described + above. Briefly, the flasks of cells were rinsed twice (5 min each) with Hank’s Balanced + Salt Solution (Sigma Aldrich, St. Louis, MO), followed by two quick washes with trypsin-EDTA + (Cell Gro Mediatech, Manassas, VA). After the second trypsin-EDTA rinse, the remaining + trypsin was removed and the flasks were left for 10 min. The trypsinized cells were + then collected in Ham’s F12 culture medium containing 10% fetal bovine serum (JRH + Biosciences, Lenexa, KS), centrifuged for 5 min at 276xg, 4°C and resuspended in Ham’s + F12 culture medium without serum. As above, approximately 1.5x106 cells/tube were centrifuged using screen-capped tubes for 5 min at 276xg, 4°C. The + filtered cells were fixed for 2 h on ice in 1% paraformaldehyde and either remained + in fixative (detection of cell surface Fas; luteal and HepG2 cells) or were permeabilized + with 70% ethanol (detection of total Fas and K8/K18 filaments; luteal cells only). + Both the fixed and the permeabilized cells were stored at 4°C and −20°C, respectively, + until analyzed by flow cytometry. +

+

Flow cytometric analysis of Fas and K8/K18 expression

+

Fixed cells (i.e., luteal and HepG2) were washed twice (5 min each) with phosphate + buffered saline with 0.1% bovine serum albumin (PBS-BSA) and centrifuged at 276xg + for 5 min at 4°C between each wash. Following the second wash, the cells were stained + for Fas using a mouse anti-human Fas antibody (clone CH11; Millipore, Billerica, MA; + diluted 1:25 with PBS with 10% normal goat serum) or an identical concentration of + nonspecific, IgG1 isotype (clone MOPC-21; Sigma) as a control. The cells were incubated + in primary antibody overnight at 4°C and then washed twice (5 min each) with PBS-BSA + with spins at 276xg for 5 min at 4°C between each wash. Detection of the primary antibody + was achieved fluorescently using a goat anti-mouse Alexa 488-conjugated IgG secondary + antibody (Invitrogen, Carlsbad, CA) diluted 1:200 with PBS-BSA with 10% normal goat + serum. For detection of K8/K18, luteal cells from CL dissociation and from culture + were washed twice (5 min each) with PBS-BSA and spun at 276xg for 5 min at 4°C between + each wash. The cells were then incubated for 1 h at 37°C with a mouse anti-human K18 + FITC-conjugated antibody (clone CY-90; Sigma Aldrich, St. Louis, MO; diluted 1:100 + with PBS- BSA). Previously we have shown K18 dimerizes with K8 such that targeting + of K18 is sufficient for the detection of K8/K18 filaments in bovine luteal cells + [16]. Quantification of cells expressing Fas and K8/K18 was accomplished using a 4 color, + dual laser FACScalibur flow cytometer (Becton Dickinson Biosciences, San Jose, CA) + with a 488nm argon laser for FITC/Alexa 488 excitation. The negative controls, either + IgG1-FITC (for K18 detection) or Alexa-488 secondary antibody only (for Fas detection), + were used to set the fluorescence gating to 1% positive controls prior to analysis. + The cells were recorded on the FL-1 filter at no more than 800 events/second with + a total of 10,000 recorded events. Data were collected using Cell Quest (Becton Dickinson + Biosciences, San Jose, CA) and graphs of the results were generated using WinMDI 2.9 + software (Scripps Institute, La Jolla, CA). Mean fluorescence intensity (MFI), a measure + of staining intensity for each cell, was calculated using the following equation: +

+
+ + + + + MFI + = + + + ( + Geometric mean of sample + + + + + + Geometric mean of negative control + ) + + + + + / + Geometric mean of negative control + + + + (2)
+

Microscopic evaluation of K8/K18 filaments and microtubules in bovine luteal cells

+

Bovine luteal cells cultured in microchamber slides were used to evaluate microscopically + the efficacy and specificity of acrylamide as a disrupter of K8/K18 filaments. The + cells were rinsed twice with PBS, fixed using 4% paraformaldehyde in PBS for 20 min + on ice, and then stored in PBS at 4°C until permeabilized with methanol and analyzed + for K8/K18 expression and microtubule expression (negative control) by fluorescent + microscopy. Briefly, the previously-fixed luteal cells were rinsed twice with PBS-BSA + followed by a 1 h block/permeabilization step with 0.3% triton x-100 in PBS containing + 10% normal goat serum (Vector Labs, Burlingame, CA) and 3% BSA. The slides were rinsed + 3 × 5 min with PBS-BSA and incubated overnight at 4°C with either a mouse anti-human + K18 monoclonal antibody (clone CY-90; Sigma Aldrich, St. Louis, MO; diluted 1:800 + in PBS-BSA with 10% normal goat serum), or a mouse anti-bovine alpha-tubulin monoclonal + antibody (clone 236–10501; Invitrogen, Carlsbad, CA; diluted 1:200 in PBS-BSA with + 10% normal goat serum). The following day, after 3 × 5 min washes with PBS-BSA, fluorescent + detection of the K18-containing filaments or tubulin-containing microtubules was achieved + by incubating the slides with a goat anti-mouse Alexa 488-conjugated IgG antibody + (K18; Invitrogen, Carlsbad, CA) or a goat anti-mouse Texas Red-conjugated antibody + (microtubules; Santa Cruz, Santa Cruz, CA). Both secondary antibodies were diluted + 1:200 in PBS-BSA with 10% normal goat serum (Vector Labs, Burlingame, CA). The slides + were counterstained with 4',6-diamidino-2-phenylindole (DAPI) mounting medium (Vector, + Burlingame, CA) and then coverslipped. +

+

Statistical analysis

+

The data were analyzed by 1-way or 2-way ANOVA followed by Tukey’s multiple comparison + test using the general linear model of Systat 12.0 (Point Richmond, CA). Results are + expressed as mean ± SEM, with each experiment repeated three to nine times (i.e., + n= 3–9). For experiments requiring cultured cells, the cells were cultured in triplicate + for a given experiment and were derived from individual CL or from a frozen stock + of cells (HepG2 cells). Thus, the total number of experiments (n=) is equivalent to + the total number of CL or frozen aliquots (HepG2) used to establish the cultures; + shown in figure legends). Differences among means at a value of P<0.05 were considered + statistically significant. +

+
+
+

Results

+
+

Fas expression is greater for bovine luteal cells of early stage CL compared to late + stage CL +

+

Freshly dissociated luteal cells from early and late stage CL were characterized for + total Fas expression (Figure 1) and expression on the cell surface (Figure 2) relative to a non-specific IgG control. Measurement of plasma progesterone revealed + the cows used to obtain early stage CL had lower systemic progesterone than cows used + for late stage CL (1.8 ± 0.2 versus 5.9 ± 0.7 ng/ml, respectively; P<0.05, n=6-9 CL/stage). + However, a higher percentage of luteal cells expressed total Fas in early stage CL + compared to late stage CL (Figure 1A, P<0.05). Mean fluorescence intensity (MFI), a measure of staining intensity for each + cell, was also higher among cells from early stage CL compared to late stage CL (Figure + 1C, P<0.05). Similarly, the expression of Fas on the cell surface was greater for cells + of early stage CL compared to late stage CL (Figure 2B, P<0.05), as was MFI (Figure 2C, P<0.05). Overall, quantification of the percentage of cells expressing Fas on the + cell surface relative to total Fas expression revealed cells from early stage CL express + the majority of Fas on the cell surface (76%), whereas less than half these cells + from late stage CL do so (47%). In terms of relative steady-state concentrations of + Fas mRNA in the luteal tissue, Q-RTPCR indicated there was no difference between early + versus late stage CL (Figure 3; P>0.05, n=5-7 CL/stage). +

+

thumbnailFigure 1. Flow cytometric analysis of total Fas in cells of early and late stage bovine CL. A representative histogram depicting the total amount of Fas detected in bovine luteal + cells of early and late stage CL is shown (Figure 1A). Relative number of cells expressing total Fas is depicted for early versus late + stage CL (Figure 1B). Relative mean fluorescence intensity (MFI) is also depicted for the two stages + of CL (Figure 1C). Values shown are mean ± SEM; different letters indicate significant differences + (P<0.05; n=6-9 CL/stage). +

+

thumbnailFigure 2. Flow cytometric analysis of Fas expression on the surface of cells of early and late + stage bovine CL. A representative histogram depicting the expression of Fas on the surface of bovine + luteal cells of early stage and late stage CL is shown (Figure 2A). Relative number of cells expressing Fas on the cell surface is depicted for early + versus late stage CL (Figure 2B). Relative mean fluorescence intensity (MFI) for the two stages of CL is also shown + (Figure 2C). Values shown are mean ± SEM; different letters indicate significant differences + (P<0.05; n=6-9 CL/stage). +

+

thumbnailFigure 3. Relative expression of Fas mRNA in bovine CL during the early and late stages of the + estrous cycle. Values shown are mean ± SEM fold-change of Fas expression (normalized using β-actin). + Different letters indicate significant differences (P<0.05; n=5-7 CL/stage). +

+

Interestingly, a comparison of Fas expression for freshly dissociated luteal cells + versus luteal cells placed in culture for 24 h revealed that culture alone substantially + increased the relative cell surface expression of Fas for cells of both early and + late stage CL. Cell surface expression of Fas increased from ~65% to ~97% as a result + of culture for cells of early stage CL (P<0.05, n=4 expts.), and from ~18% to ~66% + for cells of late stage CL (P<0.05, n=4 expts.). +

+

K8/K18 filament expression is increased in bovine luteal cells of early stage CL compared + to late stage CL +

+

A higher percentage of freshly dissociated luteal cells from early stage CL expressed + K8/K18 filaments than late stage CL (Figure 4, P<0.05). Average number of cells expressing K8/K18 filaments in early stage CL was + 46% compared to 26% for late stage CL (Figure 4B). In contrast to what was observed for cell surface expression of Fas, culture of + luteal cells for 24 h did not enhance K8/K18 expression in cells of early or late + stage CL. Relative percentage of K8/K18-positive cells was 46% vs. 49% for freshly + dissociated vs. cultured cells, respectively, in early stage CL, and was 26% vs. 23% + for freshly dissociated vs. cultured cells, respectively, in late stage CL (P>0.05, + n=4 expts., data not shown). +

+

thumbnailFigure 4. Flow cytometric analysis of K8/K18 expression in cells of early and late stage bovine + CL. A representative histogram depicting the expression of K8/K18 filaments in bovine + luteal cells of early stage and late stage CL is shown (Figure 4A). The relative number of cells expressing K8/K18 filaments is depicted for early + stage CL versus late stage CL (Figure 4B). Values shown are mean ± SEM; different letters indicate significant differences + (P<0.05; n=6-9 CL/stage). +

+

Acrylamide-induced disruption of K8/K18 filaments does not enhance cell surface expression + of Fas or cytokine-induced apoptosis +

+

Exposure of cultured bovine luteal cells to acrylamide disrupted K8/K18 filaments + without adversely affecting microtubule organization (Figure 5). Cells in control cultures exhibited extensive, filamentous networks of K8/K18 staining + (Figure 5A) that became aggregated around the perinuclear region of the cells following acrylamide + exposure (Figure 5C). Conversely, microtubule organization when compared between control and acrylamide-treated + cultures remained unaffected (Figure 5B and D, respectively). In addition, there was no observable effect of stage of CL on these + outcomes, and the acrylamide treatment overall had no effect on the number of cells + expressing K8/K18 filaments, luteal cell viability or progesterone secretion (P>0.05; + n=2-4 CL/stage, data not shown). +

+

thumbnailFigure 5. Fluorescent detection of K8/K18 filaments and microtubules in control and acrylamide-treated + cultures of bovine luteal cells. K8/K18 filaments (green fluorescence) and microtubules (red fluorescence) were immunostained + in cultured bovine luteal cells following 4 h exposure to vehicle (Control, Figure 5A and B) or 5mM acrylamide (Figure 5C and D). Cells in control cultures exhibited a filamentous, K8/K18 intermediate filament + network which spanned the cytoplasm (Figure 5A). Microtubles of these cells was similarly filamentous (red fluorescence; Figure 5B). Conversely, cells of acrylamide-treated cultures exhibited peri-nuclear aggregation + of K8/K18 filaments (Figure 5C), yet the microtubules were unaffected (Figure 5D). Magnification: 40x. +

+

Although acrylamide disrupted K8/K18 filaments, no increase in the cell surface expression + of Fas was observed for luteal cells of either stage of CL (Figure 6A-C; P>0.05). Moreover, K8/K18 filament disruption failed to enhance Fas cell surface + expression on specific cells, as reflected by the lack of change in relative MFI (Figure + 6D; P>0.05). Consistent with the observations of freshly isolated luteal cells, cultured + luteal cells of early stage CL expressed higher amounts of Fas on the surface than + cultured cells of late stage CL (Figure 6C and D; P<0.05). In contrast, disruption of K8/K18 filaments in HepG2 cells, using identical + experimental conditions to those for bovine luteal cells, increased the number of + cells expressing Fas on the cell surface (Figure 7; P<0.05). +

+

thumbnailFigure 6. Flow cytometric analysis of Fas expression on the surface of cells of early and late + stage bovine CL following K8/K18 filament disruption with acrylamide. Representative histograms depicting the expression of Fas on the surface of bovine + luteal cells of early stage and late stage CL are shown (Figure 6A and B, respectively). The relative percentage of cells expressing Fas on the cell surface + is depicted for early versus late stage CL, and for control versus acrylamide-treated + cells (Figure 6C). Relative mean fluorescence intensity (MFI) is also depicted for the two stages + of CL and the treatment conditions (Figure 6D). Values shown are mean ± SEM; different letters indicate significant differences + (P<0.05; n=4 CL/stage). +

+

thumbnailFigure 7. Flow cytometric analysis of Fas expression on the surface of HepG2 cells following + K8/K18 filament disruption with acrylamide. A representative histogram depicting the expression of Fas on the surface of HepG2 + cells is shown (Figure 7A). The relative percentage of cells expressing Fas on the cell surface is depicted + for control versus acrylamide-treated cells (Figure 7B). Values shown are mean ± SEM; different letters indicate significant differences + (P<0.05; n=3 expts.). +

+

Exposure of the cultured bovine luteal cells for 48 h to a cytokine cocktail consisting + of IFN, TNF, and FasL induced cell death, as expected, but there was no effect of + K8/K18 disruption by acrylamide (P>0.05) or stage of CL (P>0.05, n= 4 CL/stage) on + this outcome (Figure 8). Similar results were observed when the luteal cells were exposed to cytokines and + acrylamide for only 24 h (data not shown). +

+

thumbnailFigure 8. Cell death in cultured luteal cells from early and late stage bovine CL following + exposure to acrylamide and cytokines. The relative percentage of cells undergoing death after 48 h exposure to cytokines + (Cyto) is depicted for early and late stage CL (Figure 8A and B, respectively). The cultures were also exposed to a 4 h pretreatment with 5mM acrylamide + (Acryl) to disrupt K8/K18 filaments before cytokine treatment. Values shown are mean + ± SEM; different letters indicate significant differences (P<0.05; n=3 expts.). +

+
+
+

Discussion

+
+

The current study is the first to directly measure relative changes in the expression + of Fas on the surface of bovine luteal cells across the estrous cycle. The observation + of quantifiably higher Fas receptor expression on luteal cells from early stage compared + to late stage CL was unexpected and somewhat contrary to what was anticipated based + upon earlier published studies. In these studies, the investigators examined the gross + expression of Fas mRNA [5,7] and protein [34,35] in ovarian tissues, without reference to cell-specificity and they found that Fas + increased only in CL undergoing regression. In the current study, Fas protein was + quantified for individual cells obtained from CL following tissue dissociation and + cell culture, and then analyzed using flow cytometry. Similarly, Fas mRNA expression + for the two stages of CL was measured by Q-RTPCR. The current methods are arguably + more quantitative than the mRNA detection, immunoblot analysis, and immunohistochemistry + methods described in the cited studies, but fall short of identifying specific cell + type(s) known to exist within the CL. Nevertheless, dissociation of the luteal tissue + and establishing serum-free culture conditions, as described, removes many of the + various cell types, while enriching the population of luteal steroidogenic cells. + Thus, we suggest the pattern of Fas expression observed in the current study is essentially + representative of the luteal steroidogenic cell population within the bovine CL at + the two extremes of the estrous cycle. Moreover, our observation of no measureable + difference in relative steady-state amounts of mRNA for Fas in early versus late stage + CL, as evaluated by Q-RTPCR, is consistent with a previously published study [5]. +

+

Overall, a 72% decline in the number of bovine luteal cells expressing Fas at their + cell surface, and a 59% decline in the density of Fas expressed at the cell surface + across the estrous cycle was observed. Total Fas expression (surface and intracellular) + for freshly isolated cells was higher for early stage CL than late stage CL. A similar + difference in Fas surface expression was observed for cultured luteal cells, but was + further enhanced by culture alone. Exposure of the cultured cells to the cytokine + cocktail of IFN + TNF + FasL, however, resulted in similar estimates of cell death + for both stages of CL. This indicates cultured luteal cells from both stages of CL + are equally vulnerable to cytokine-mediated cell death despite clear differences in + the cell surface expression of Fas. +

+

The observation that Fas expression is elevated on luteal cells of early stage CL + without further enhancing their susceptibility to cytokine-induced death indicates + mechanisms exist to protect the cells against Fas-induced apoptosis. For instance, + a soluble secreted isoform of Fas has been identified in other tissues that sequesters + FasL prior to binding at the target cell surface, thus preventing cell death [36-38]. This isoform of Fas lacks the transmembrane domain of wild-type Fas, causing it + to be secreted rather than expressed on the surface of cells [38]. The murine ovary expresses a soluble form of Fas, which has protective effects [36]. Thus, it is possible a soluble form of Fas exists within the bovine CL to modulate + the effect of elevated Fas expression in early stage CL as seen in the current study. + Certainly this possibility merits additional exploration. +

+

Another intrinsic “protective” mechanism of cells of early stage bovine CL might include + the expression of membrane-bound splice variants of the Fas receptor. The cytokine + TRAIL (TNF-related apoptosis-inducing ligand) for example, which is structurally similar + to FasL, binds to receptors, DR4 and DR5, yet membrane-bound decoy receptors also + exist for TRAIL. These receptors, named DcR1 and DcR2, have a cytoplasmic domain structurally + similar to DR4 and DR5, respectively, but lack the intracellular death domain necessary + for transmitting an apoptotic signal [39-41]. Recently, Sugimoto and coworkers identified a putative Fas decoy receptor, DcR3, + in granulosa cells of porcine ovaries [42]. Similar to DcR1 and DcR2, DcR3 contains an extracellular and cytoplasmic domain + similar to wild-type Fas, but lacks the intracellular death domain. Unlike soluble + Fas, the decoy receptor is expressed on the plasma membrane and retains its ability + to bind FasL, but does not induce cell death [43]. It is tempting to speculate that a decoy receptor of Fas may exist on bovine luteal + cells, explaining the high prevalence of Fas expression observed for cells of early + stage CL, but not late stage CL, in the current study. Further research is needed + to determine whether or not a Fas decoy receptor exists within the bovine ovary, and + to explore its possible role in ovarian function. +

+

Alternatively, enhanced expression of Fas on cells of early stage CL can be explained + by a non-apoptotic or even proliferative role of Fas in the early stage CL. In recent + years, diverse non-apoptotic functions of Fas have been documented [44], such as the acceleration of liver regeneration after partial hepatectomy [45], the induction of cell migration and invasiveness of apoptotic-resistant tumor cells + [46], and the stimulation of cardiomyocyte hypertrophy [47]. The ability of Fas to control the fate of the cell likely hinges on the regulation + of Fas-induced downstream signaling events, such as activation/inhibition of the ERK, + JNK, p38, and NF-κB pathways. These same pathways have suggested roles in luteal cell + function and fate [5,48-50], but their influence on the developing early CL, especially in the context of elevated + Fas expression, is unknown. Overall, the concept that Fas might facilitate development + of the CL is consistent with the premise suggested by Pate and Keyes [51], in which immune-response mechanisms exist within the ovary to abate damaging inflammatory + responses caused by dead or dying cells. In the current scenario, these cells would + arise from postovulation trauma during the initial development of the CL. +

+

In the present study, acrylamide selectively disrupted the K8/K18 filaments in the + luteal cells, but did not enhance Fas expression or otherwise influence Fas-mediated + cell death. In effect, this result did not support the concept that K8/K18 filaments + influence Fas trafficking at the cell surface. However, acrylamide causes intermediate + filaments to only partially disassemble and undergo acute dephosphorylation [30]. Dephosphorylation provokes a 50% loss of phosphate from the keratin protein which + corresponds with the morphological changes observed for intermediate filament expression + [27]. At best, the dephosphorylation event is transient, and the striking changes in intermediate + filament organization are reversible. In fact, the filaments re-establish their ‘net-like’ + organization generally within 12 h after acrylamide removal [29], and complete rephosphorylation occurs within 18 h [27]. In the current investigation, the K8/K18 filaments of bovine luteal cells were exposed + to acrylamide for only 4 h. This was sufficient time to noticeably disrupt the filaments, + but perhaps insufficient to sustain a change in Fas trafficking or in downstream signaling + that would otherwise enhance cell death. It is noteworthy, however, that these same + conditions increased Fas expression on HepG2 cells in the current study. For the time-being, + we cannot reject the possibility that K8/K18 filaments influence events downstream + from Fas binding; however, it seems unlikely that the filaments directly impair Fas + expression on the cell surface as has been suggested in other studies [20-22]. +

+
+
+

Conclusions

+
+

In conclusion, the elevated expression of Fas on cells of early stage bovine CL compared + to late stage bovine CL raises a provocative question concerning the physiological + role(s) of Fas in the corpus luteum. Although there is little doubt about the apoptotic + function of this receptor during luteal regression, its purpose during early luteal + development has yet to be defined. We suggest, as others do, that a broader view of + Fas-mediated activities merits consideration, including the need to identify the signaling + components linking Fas to non-apoptotic pathways. These insights may provide new targets + to influence fertility, and treat diseases such as inflammation and cancer. +

+
+
+

Abbreviations

+
+

CL: Corpus luteum or corpora lutea; FasL: Fas ligand; K: Keratin; mRNA: messenger + RNA; mM: millimolar; PBS-BSA: Phosphate buffered saline containing bovine serum albumin; + Q-RTPCR: Quantitative real-time polymerase chain reaction. +

+
+
+

Competing interests

+
+

The authors declare that they have no competing interests.

+
+
+

Authors’ contributions

+
+

AD and JF equally carried out the cell culture studies, performed the flow cytometric + analyses, conducted the microscopy work and assisted in the preparation of the manuscript. + AB conducted the Q-RTPCR analysis. DT conceived of the study, participated in its + design and coordination, and drafted the manuscript. All authors read and approved + the final manuscript. +

+
+
+

Acknowledgements

+
+

This work was supported by National Research Initiative Competitive Grant no. 2007-35203-18074 + from the USDA National Institute of Food and Agriculture. Partial funding was provided + by the New Hampshire Agricultural Experiment Station. This is Scientific Contribution + Number 2491. The authors wish to thank Sarah Kinsman, Amanda Rivers, and Mark Townley + for their contributions to this manuscript. +

+
+
+

References

+
+
    +
  1. +

    Ashkenazi A, Dixit VM: Death receptors: signaling and modulation.

    Science 1998, 281(5381):1305-1308. PubMed Abstract | Publisher Full Text OpenURL

    +
  2. +
  3. +

    Wajant H: The Fas signaling pathway: more than a paradigm.

    Science 2002, 296(5573):1635-1636. PubMed Abstract | Publisher Full Text OpenURL

    +
  4. +
  5. +

    Nagata S: Fas ligand-induced apoptosis.

    Annu Rev Genet 1999, 33:29-55. PubMed Abstract | Publisher Full Text OpenURL

    +
  6. +
  7. +

    Scaffidi C, Fulda S, Srinivasan A, Friesen C, Li F, Tomaselli KJ, Debatin KM, Krammer PH, Peter ME: Two CD95 (APO-1/Fas) signaling pathways.

    1998, 17(6):1675-1687. PubMed Abstract | Publisher Full Text | PubMed Central Full Text OpenURL

    +
  8. +
  9. +

    Taniguchi H, Yokomizo Y, Okuda K: Fas-Fas ligand system mediates luteal cell death in bovine corpus luteum.

    Biol Reprod 2002, 66(3):754-759. PubMed Abstract | Publisher Full Text OpenURL

    +
  10. +
  11. +

    Pru JK, Hendry IR, Davis JS, Rueda BR: Soluble Fas ligand activates the sphingomyelin pathway and induces apoptosis in luteal + steroidogenic cells independently of stress-activated p38(MAPK).

    Endocrinology 2002, 143(11):4350-4357. PubMed Abstract | Publisher Full Text OpenURL

    +
  12. +
  13. +

    Kliem H, Berisha B, Meyer HH, Schams D: Regulatory changes of apoptotic factors in the bovine corpus luteum after induced + luteolysis.

    Mol Reprod Dev 2009, 76(3):220-230. PubMed Abstract | Publisher Full Text OpenURL

    +
  14. +
  15. +

    Carambula SF, Pru JK, Lynch MP, Matikainen T, Goncalves PB, Flavell RA, Tilly JL, Rueda BR: Prostaglandin F2alpha- and FAS-activating antibody-induced regression of the corpus + luteum involves caspase-8 and is defective in caspase-3 deficient mice.

    Reprod Biol Endocrinol 2003, 1:15. PubMed Abstract | BioMed Central Full Text | PubMed Central Full Text OpenURL

    +
  16. +
  17. +

    Moll R, Franke WW, Schiller DL, Geiger B, Krepler R: The catalog of human cytokeratins: patterns of expression in normal epithelia, tumors + and cultured cells.

    Cell 1982, 31(1):11-24. PubMed Abstract | Publisher Full Text OpenURL

    +
  18. +
  19. +

    Czernobilsky B, Moll R, Levy R, Franke WW: Co-expression of cytokeratin and vimentin filaments in mesothelial, granulosa and + rete ovarii cells of the human ovary.

    Eur J Cell Biol 1985, 37:175-190. PubMed Abstract OpenURL

    +
  20. +
  21. +

    Gall L, De Smedt V, Ruffini S: Co-expression of cytokeratins and vimentin in sheep cumulus-oocyte complexes. Alteration + of intermediate filament distribution by acrylamide.

    Dev Growth Differ 1992, 34(5):579-587. Publisher Full Text OpenURL

    +
  22. +
  23. +

    Gallicano GI, Larabell CA, McGaughey RW, Capco DG: Novel cytoskeletal elements in mammalian eggs are composed of a unique arrangement + of intermediate filaments.

    Mech Dev 1994, 45(3):211-226. PubMed Abstract | Publisher Full Text OpenURL

    +
  24. +
  25. +

    Nilsson I, Mattsson MO, Selstam G: Presence of the intermediate filaments cytokeratins and vimentin in the rat corpus + luteum during luteal life-span.

    Histochem Cell Biol 1995, 103(3):237-242. PubMed Abstract | Publisher Full Text OpenURL

    +
  26. +
  27. +

    Santini D, Ceccarelli C, Mazzoleni G, Pasquinelli G, Jasonni VM, Martinelli GN: Demonstration of cytokeratin intermediate filaments in oocytes of the developing and + adult human ovary.

    Histochemistry 1993, 99(4):311-319. PubMed Abstract | Publisher Full Text OpenURL

    +
  28. +
  29. +

    van den Hurk R, Dijkstra G, van Mil FN, Hulshof SC, van den Ingh TS: Distribution of the intermediate filament proteins vimentin, keratin, and desmin in + the bovine ovary.

    Mol Reprod Dev 1995, 41(4):459-467. PubMed Abstract | Publisher Full Text OpenURL

    +
  30. +
  31. +

    Townson DH, Putnam AN, Sullivan BT, Guo L, Irving-Rodgers HF: Expression and distribution of cytokeratin 8/18 intermediate filaments in bovine antral + follicles and corpus luteum: an intrinsic mechanism of resistance to apoptosis?

    Histol Histopathol 2010, 25(7):889-900. PubMed Abstract | Publisher Full Text OpenURL

    +
  32. +
  33. +

    Fuchs E, Weber K: Intermediate filaments: structure, dynamics, function, and disease.

    Annu Rev Biochem 1994, 63:345-382. PubMed Abstract | Publisher Full Text OpenURL

    +
  34. +
  35. +

    Singh S, Koke JR, Gupta PD, Malhotra SK: Multiple roles of intermediate filaments.

    Cytobios 1994, 77(308):41-57. PubMed Abstract OpenURL

    +
  36. +
  37. +

    Eriksson JE, Dechat T, Grin B, Helfand B, Mendez M, Pallari HM, Goldman RD: Introducing intermediate filaments: from discovery to disease.

    J Clin Invest 2009, 119(7):1763-1771. PubMed Abstract | Publisher Full Text | PubMed Central Full Text OpenURL

    +
  38. +
  39. +

    Gilbert S, Loranger A, Daigle N, Marceau N: Simple epithelium keratins 8 and 18 provide resistance to Fas-mediated apoptosis. + The protection occurs through a receptor-targeting modulation.

    J Cell Biol 2001, 154(4):763-773. PubMed Abstract | Publisher Full Text | PubMed Central Full Text OpenURL

    +
  40. +
  41. +

    Ku NO, Soetikno RM, Omary MB: Keratin mutation in transgenic mice predisposes to Fas but not TNF-induced apoptosis + and massive liver injury.

    Hepatology 2003, 37(5):1006-1014. PubMed Abstract | Publisher Full Text OpenURL

    +
  42. +
  43. +

    Marceau N, Loranger A, Gilbert S, Daigle N, Champetier S: Keratin-mediated resistance to stress and apoptosis in simple epithelial cells in + relation to health and disease.

    Biochem Cell Biol 2001, 79(5):543-555. PubMed Abstract | Publisher Full Text OpenURL

    +
  44. +
  45. +

    Ricken AM, Spanel-Borowski K, Saxer M, Huber PR: Cytokeratin expression in bovine corpora lutea.

    Histochem Cell Biol 1995, 103(5):345-354. PubMed Abstract | Publisher Full Text OpenURL

    +
  46. +
  47. +

    Pate JL, Condon WA: Effects of serum and lipoproteins on steroidogenesis in cultured bovine luteal cells.

    Mol Cell Endocrinol 1982, 28(3):551-562. PubMed Abstract | Publisher Full Text OpenURL

    +
  48. +
  49. +

    Goldberg MJ, Moses MA, Tsang PC: Identification of matrix metalloproteinases and metalloproteinase inhibitors in bovine + corpora lutea and their variation during the estrous cycle.

    J Anim Sci 1996, 74(4):849-857. PubMed Abstract | Publisher Full Text OpenURL

    +
  50. +
  51. +

    Vickers SL, Cowan RG, Harman RM, Porter DA, Quirk SM: Expression and activity of the Fas antigen in bovine ovarian follicle cells.

    Biol Reprod 2000, 62(1):54-61. PubMed Abstract | Publisher Full Text OpenURL

    +
  52. +
  53. +

    Eckert BS, Yeagle PL: Acrylamide treatment of PtK1 cells causes dephosphorylation of keratin polypeptides.

    Cell Motil Cytoskeleton 1988, 11(1):24-30. PubMed Abstract | Publisher Full Text OpenURL

    +
  54. +
  55. +

    Durham HD, Pena SD, Carpenter S: The neurotoxins 2,5-hexanedione and acrylamide promote aggregation of intermediate + filaments in cultured fibroblasts.

    Muscle Nerve 1983, 6(9):631-637. PubMed Abstract | Publisher Full Text OpenURL

    +
  56. +
  57. +

    Eckert BS: Alteration of intermediate filament distribution in PtK1 cells by acrylamide.

    Eur J Cell Biol 1985, 37:169-174. PubMed Abstract OpenURL

    +
  58. +
  59. +

    Eckert BS: Alteration of the distribution of intermediate filaments in PtK1 cells by acrylamide. + II: effect on the organization of cytoplasmic organelles.

    Cell Motil Cytoskeleton 1986, 6(1):15-24. PubMed Abstract | Publisher Full Text OpenURL

    +
  60. +
  61. +

    Shiver TM, Sackett DL, Knipling L, Wolff J: Intermediate filaments and steroidogenesis in adrenal Y-1 cells: acrylamide stimulation + of steroid production.

    Endocrinology 1992, 131(1):201-207. PubMed Abstract | Publisher Full Text OpenURL

    +
  62. +
  63. +

    Quirk SM, Harman RM, Cowan RG: Regulation of Fas antigen (Fas, CD95)-mediated apoptosis of bovine granulosa cells + by serum and growth factors.

    Biol Reprod 2000, 63(5):1278-1284. PubMed Abstract | Publisher Full Text OpenURL

    +
  64. +
  65. +

    Bowolaksono A, Nishimura R, Hojo T, Sakumoto R, Acosta TJ, Okuda K: Anti-apoptotic roles of prostaglandin E2 and F2alpha in bovine luteal steroidogenic + cells.

    Biol Reprod 2008, 79(2):310-317. PubMed Abstract | Publisher Full Text OpenURL

    +
  66. +
  67. +

    Sakamaki K, Yoshida H, Nishimura Y, Nishikawa S, Manabe N, Yonehara S: Involvement of Fas antigen in ovarian follicular atresia and luteolysis.

    Mol Reprod Dev 1997, 47(1):11-18. PubMed Abstract | Publisher Full Text OpenURL

    +
  68. +
  69. +

    Roughton SA, Lareu RR, Bittles AH, Dharmarajan AM: Fas and Fas ligand messenger ribonucleic acid and protein expression in the rat corpus + luteum during apoptosis-mediated luteolysis.

    Biol Reprod 1999, 60(4):797-804. PubMed Abstract | Publisher Full Text OpenURL

    +
  70. +
  71. +

    Komatsu K, Manabe N, Kiso M, Shimabe M, Miyamoto H: Soluble Fas (FasB) regulates luteal cell apoptosis during luteolysis in murine ovaries.

    Mol Reprod Dev 2003, 65(4):345-352. PubMed Abstract | Publisher Full Text OpenURL

    +
  72. +
  73. +

    Hughes DP, Crispe IN: A naturally occurring soluble isoform of murine Fas generated by alternative splicing.

    J Exp Med 1995, 182(5):1395-1401. PubMed Abstract | Publisher Full Text | PubMed Central Full Text OpenURL

    +
  74. +
  75. +

    Cheng J, Zhou T, Liu C, Shapiro JP, Brauer MJ, Kiefer MC, Barr PJ, Mountz JD: Protection from Fas-mediated apoptosis by a soluble form of the Fas molecule.

    Science 1994, 263(5154):1759-1762. PubMed Abstract | Publisher Full Text OpenURL

    +
  76. +
  77. +

    Pan G, Ni J, Wei YF, Yu G, Gentz R, Dixit VM: An antagonist decoy receptor and a death domain-containing receptor for TRAIL.

    Science 1997, 277(5327):815-818. PubMed Abstract | Publisher Full Text OpenURL

    +
  78. +
  79. +

    Marsters SA, Sheridan JP, Pitti RM, Huang A, Skubatch M, Baldwin D, Yuan J, Gurney A, Goddard AD, Godowski P, et al.: A novel receptor for Apo2L/TRAIL contains a truncated death domain.

    Curr Biol 1997, 7(12):1003-1006. PubMed Abstract | Publisher Full Text OpenURL

    +
  80. +
  81. +

    Sheridan JP, Marsters SA, Pitti RM, Gurney A, Skubatch M, Baldwin D, Ramakrishnan L, Gray CL, Baker K, Wood WI, et al.: Control of TRAIL-induced apoptosis by a family of signaling and decoy receptors.

    Science 1997, 277(5327):818-821. PubMed Abstract | Publisher Full Text OpenURL

    +
  82. +
  83. +

    Sugimoto M, Kagawa N, Morita M, Kume S, Wongpanit K, Jin H, Manabe N: Changes in the expression of decoy receptor 3 in granulosa cells during follicular + atresia in porcine ovaries.

    J Reprod Dev 2010, 56(4):467-474. PubMed Abstract | Publisher Full Text OpenURL

    +
  84. +
  85. +

    Jenkins M, Keir M, McCune JM: A membrane-bound Fas decoy receptor expressed by human thymocytes.

    J Biol Chem 2000, 275(11):7988-7993. PubMed Abstract | Publisher Full Text OpenURL

    +
  86. +
  87. +

    Peter ME, Budd RC, Desbarats J, Hedrick SM, Hueber AO, Newell MK, Owen LB, Pope RM, Tschopp J, Wajant H, et al.: The CD95 receptor: apoptosis revisited.

    Cell 2007, 129(3):447-450. PubMed Abstract | Publisher Full Text OpenURL

    +
  88. +
  89. +

    Desbarats J, Newell MK: Fas engagement accelerates liver regeneration after partial hepatectomy.

    Nat Med 2000, 6(8):920-923. PubMed Abstract | Publisher Full Text OpenURL

    +
  90. +
  91. +

    Barnhart BC, Legembre P, Pietras E, Bubici C, Franzoso G, Peter ME: CD95 ligand induces motility and invasiveness of apoptosis-resistant tumor cells.

    EMBO J 2004, 23(15):3175-3185. PubMed Abstract | Publisher Full Text | PubMed Central Full Text OpenURL

    +
  92. +
  93. +

    Badorff C, Ruetten H, Mueller S, Stahmer M, Gehring D, Jung F, Ihling C, Zeiher AM, Dimmeler S: Fas receptor signaling inhibits glycogen synthase kinase 3β and induces cardiac hypertrophy + following pressure overload.

    J Clin Invest 2002, 109(3):373-381. PubMed Abstract | Publisher Full Text | PubMed Central Full Text OpenURL

    +
  94. +
  95. +

    Rueda BR, Hendry IR, Ndjountche L, Suter J, Davis JS: Stress-induced mitogen-activated protein kinase signaling in the corpus luteum.

    Mol Cell Endocrinol 2000, 164(1–2):59-67. PubMed Abstract | Publisher Full Text OpenURL

    +
  96. +
  97. +

    Chen D, Fong HW, Davis JS: Induction of c-fos and c-junMessenger ribonucleic acid expression by prostaglandin + F2α is mediated by a protein kinase C-dependent extracellular signal-regulated kinase + mitogen-activated protein kinase pathway in bovine luteal cells.

    Endocrinology 2001, 142(2):887-895. PubMed Abstract | Publisher Full Text OpenURL

    +
  98. +
  99. +

    Arvisais E, Hou X, Wyatt TA, Shirasuna K, Bollwein H, Miyamoto A, Hansen TR, Rueda BR, Davis JS: Prostaglandin F2α represses IGF-I-stimulated IRS1/Phosphatidylinositol-3-Kinase/AKT + signaling in the corpus luteum: role of ERK and P70 ribosomal S6 kinase.

    Mol Endocrinol 2010, 24(3):632-643. PubMed Abstract | Publisher Full Text | PubMed Central Full Text OpenURL

    +
  100. +
  101. +

    Pate JL, Landis Keyes P: Immune cells in the corpus luteum: friends or foes?

    Reproduction 2001, 122(5):665-676. PubMed Abstract | Publisher Full Text OpenURL

    +
  102. +
+
+
+
+
+
+
+
+
+ \ No newline at end of file diff --git a/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/test_space_exception1.html b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/test_space_exception1.html new file mode 100644 index 00000000..f84a82a0 --- /dev/null +++ b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/test_space_exception1.html @@ -0,0 +1,5064 @@ + + + + + + + + + + + + + + + + + + + + Open and Shut?: The Open Access Big Deal: Back to the Future + + + + + + +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ +
+
+
+
+
+
+

Wednesday, March 28, 2018

+
+
+
+ +

+ The Open Access Big Deal: Back to the Future +

+
+
+
+
+
+ On a superficial reading open access is intended to do no more than what it says on the can: provide an internet-based scholarly communication system in which research is made available sans paywall – in other words, a system offering improved accessibility over the traditional subscription system. 
+
+
+
+
+ On a deeper reading, however, we learn that the OA movement was a response to the unsustainably high costs of the subscription system and that it was based on a conviction that open access would be a more cost-effective way of sharing research – in other words, a system offering improved affordability.
+ + In addition, it was argued, open access would be a more transparent way of doing things than the subscription-based system. 
+
Essentially, the argument went like this: If researchers paid an article-processing charge (APC) every time they wanted to publish a paper (rather than librarians paying the costs of publishing by purchasing subscriptions to large bundles of journals courtesy of the so-called Big Deal), then not only could research papers be made freely available to all, but authors would be able to make price-based decisions when choosing where to publish. 
+
+
+
+ This price transparency, argued OA advocates, would introduce market forces into scholarly publishing that are absent in the subscription system. It would also allow new open access publishers to enter the market with lower-priced products, which would help drive down prices.
+
+
+
+ In short, OA advocates promised that open access would not only provide greater accessibility but a more cost-effective scholarly communication system, thereby solving the affordability problem that has long dogged scholarly publishing. And to achieve this, they said, transparency is key.
+
+
+

+ Transparency is key

+

+

+
+
+
+ Transparency is key because in order to make price-based decisions buyers need to be able to compare prices. While APCs allow this, Big Deals do not, because with the subscription system researchers have no idea whatsoever what costs are involved, and librarians (who buy on their behalf) do not have a published price list to work from and do not know what other librarians are paying for their Big Deals, since publishers insist on non-disclosure agreements (NDAs). In such an environment pricing is opaque and everyone bar the publishers has to fly blind.
+
+
+
+ As OA developed, however, it became apparent that most researchers do not have access to the necessary funds to pay APCs, so libraries have again had to start acting as intermediaries. In doing so, however, they have found the task of managing hundreds of APC payments on behalf of researchers difficult, time-consuming and expensive. For this reason, they have struggled to cope.
+
+
+
+ At the same time, European governments, funders and university leaders have become increasingly impatient at the time it is taking to achieve widescale open access.
+
+
+
+ These two things have led to the emergence of the OA Big Deal. Here agreements are signed with legacy publishers that combine bulk journal subscription fees (as with traditional Big Deals) plus bulk OA publishing fees so that authors can publish without personally having to pay APCs. Those librarians and university leaders signing these deals have therefore come to view the OA Big Deal as the best way of transitioning to a fully OA publishing environment. And while today the OA Big Deal is more of a European issue, it looks set to become the model of choice elsewhere in the global North (also here).
+
+
+
+ As we shall see, however, there are good reasons to doubt that this strategy can provide a satisfactory outcome. 
+
+
+
+ Strikingly, it is the most vocal critics of legacy publishers and their prices (librarians and national university associations) who are promoting these deals, either because they fail to understand (or accept) the implications of what they are doing, or because they have been mesmerised by the EU’s rash  and unthought-through commitment to make all European research freely available by 2020.
+
+
+
+ It is also concerning that the negotiators of these OA Big Deals appear to have little appetite for transparency. What these agreements consist of, what they cost, and what kind of value for money they offer (or don’t offer), therefore, is generally unknown to anyone outside the small group of people taking part in in the negotiations.
+
+
+
+

+ Hard to describe

+

+

+
+
+
+ Commenting on these deals last year, the CEO of OA publisher Hindawi Paul Peters said, “Describing exactly what these things are is quite difficult, both because … none of these deals looks like another one of these deals – they all have slightly different models. Universities are getting coupons they can use on things, or discounts, or everything is all bundled together.”
+
+
+
+ He added, “And apart from their being difficult to describe because of the variety, they are hard to describe because they are not public. Most of these things are covered by NDAs and the only things we hear tend to leak out via the grapevine, so it is hard to describe what is happening.”
+
+
+
+ Contributing a further level of confusion, these agreements have given rise to a rash of ambiguous euphemisms – including “transition agreement”, “offsetting arrangement”, “Read and Publish” agreement and, more recently, “Publish and Read” agreement.
+
+
+
+ What then can we say about these OA Big Deals? How do we go about differentiating them? And what are the likely implications of their use?
+
+
+
+ One obvious problem with the OA Big Deal is that it allows large legacy publishers to lock their high prices into the new OA environment, while marginalising and excluding the new-entrants that were supposed to disrupt the market. Unless something changes, therefore, the affordability problem will only be perpetuated.
+
+
+
+ And as Peters points out, it is not even clear what these things are. When I took to Twitter to ask Bernhard Mittermaier (head of the Central Library at the Jülich Research Centre and a member of a team negotiating these kind of deals in Germany) for guidance on the term Read and Publish he replied: “To the best of my knowledge, the term ‘Read & Publish’ was introduced by Royal Society of Chemistry, though others had the concept in place earlier.”
+
+
+
+ Under this model, he explained, “An institution pays for reading (subscription fees) and can publish all its corresponding author articles as hybrid open access. This comes with no additional costs for the authors and for the institution; everything is paid upfront as a lump sum (adjustments according to the actual number of articles might occur in the following year or the next contract). Another name for that is ‘offsetting contract’.”
+
+
+
+ By contrast, he added, the model that he and his colleagues in Germany are working towards with Project DEAL is what they have dubbed ‘Publish and Read’. With this model, he explained, “We strive to pay according to the number of publications and read at no additional costs. Rationale: Publishers are paid for publishing. One can read the articles elsewhere as well. And there is no reason to pay them for both.”
+
+
+
+ This website describes the Publish and Read model as one in which all the articles by authors in eligible institutions are made open access on publication under a CC BY license (the Publish component) and eligible institutions have perpetual access to the complete e‑journal portfolio of the publisher in question (the Read component).
+
+
+
+ It seems fair to say that the Publish and Read approach is more assertive than most OA Big Deal models. And that presumably explains why it has led to a long-running standoff between German universities and Elsevier.
+
+
+
+ In the Netherlands by contrast, a less aggressive approach has been adopted. Essentially, explained Dutch OA publishing consultant Jeroen Sondervan on Twitter, the Dutch have been pursuing a Read and Publish model. This is being done under the aegis of the association of Dutch universities (VSNU).
+
+
+

+ Fundamentally flawed

+

+

+
+
+
+ But even if the more aggressive German approach were widely adopted, it would still see legacy publishers embedding themselves and their high prices into the new OA world, while elbowing aside OA publishers like Hindawi and PLOS. For this reason alone, some feel that the OA Big Deal is fundamentally flawed.
+
+
+
+ And as indicated, the current landscape is confused and confusing. Even those seeking to negotiate these deals appear sometimes to be hazy as to what exactly an OA Big Deal consists (or should consist) of.
+
+
+
+ On 15th March, for instance, the Swissuniversities (Rectors’ Conference of the Swiss Universities) published a document announcing that, as part of a national open access policy intended to make all publicly-funded research freely accessible by 2024, Switzerland plans to negotiate OA Big Deal contracts with publishers. These, it said, would consist of agreements in which [Google translation] “universities finance the publication costs and pay a fixed price for reading and downloading published articles and to pay for works without incurring additional costs for Swiss universities. This will replace the classic subscriptions whose prices vary according to the journal.”
+
+
+
+ This makes it sound like Switzerland currently buys individual journal subscriptions, which seems unlikely. But what is puzzling here is that when one enquires as to whether the Swiss are looking to emulate the assertive German approach, the more modest VSNU approach, or some other approach, one gets the impression that the negotiators either do not know what strategy they plan to pursue, or wish to keep it secret.
+
+
+
+ So, another characteristic of the OA Big Deal is that it tends to perpetuate the secrecy inherent in the traditional Big Deal. The problem is that if the details of these agreements are kept secret how will any negotiating team be able to make price-based decisions, or facilitate the emergence of market forces? They won’t know what publishers agreed with other groups, and so will find they are still be flying blind. Likewise, how will taxpayers know what has been purchased in their name, and at what price?
+
+
+
+ It would seem, therefore, that OA Big Deals will simply replicate the lack of transparency endemic to the subscription Big Deal, and so fail to address the affordability problem.
+
+
+

+ Well-intentioned and worthy

+

+

+
+
+
+ Let’s be clear, those negotiating OA Big Deals are well-intentioned people and their goal is a worthy one: They want to engineer a transition to a world in which all research is freely available to everyone, which is surely “a good thing”. It is also important to note that they are keen to avoid the disastrous mistake the UK made in trying to take a leadership role in open access by agreeing to pay publishers more money to provide OA.
+
+
+
+ The UK has signally failed to achieve any meaningful transition to OA while enriching publishers further. It is also now clear that the UK approach is financially unsustainable. This is all too evident if one reads the recent Monitoring the Transition to Open Access report. As the Times Higher noted, “In 2016, a sample of 10 UK universities paid £16.1 million for subscriptions to seven of the biggest publishers, up a fifth since 2013, according to a new Universities UK report tracking the growth of open access publishing, released on 5 December. Yet these universities also spent £3.4 million on article processing charges (APCs), the fees required to publish an article open access, up from about £750,000 three years previously”.
+
+
+
+ European negotiators have therefore vowed that they will provide no new money for OA Big Deals, although whether they will succeed in this remains open to question. In any case, even if they do manage to avoid the British trap, it is highly unlikely that the costs of scholarly communication will come down as a result of these agreements. Legacy publishers are never going to voluntarily agree to deals that will lead to a fall in their profits, and by their very nature, these deals cannot unleash the kind of market forces that would be needed to compel publishers to lower their prices.
+
+
+
+ Moreover, even if the OA movement gives up one of its primary objectives (a lower-cost scholarly communication system) how will anyone not involved in the negotiations know whether any OA Big Deal does or does not involve new money and is good value? As Peters pointed out, aside from those directly involved in negotiations, no one knows what is being agreed.
+
+
+
+ There is, therefore, growing concern in the research community that those agreeing national contracts with publishers are failing to get a good deal, and enriching publishers at the expense of the research community (and thus the taxpayer). There is, for instance, now some disenchantment with the UK organisation charged with negotiating with publishers (Jisc). We shall discuss Finland and the Netherlands in a minute.
+
+
+
+ To find out what publishers are being paid for their Big Deals researchers have had to resort to making Freedom of Information (FoI) requests – a tactic pioneered in 2014 by Cambridge mathematician Timothy Gowers, and by similarly curious researchers in the US. Their findings have only led to growing anger and scepticism.
+
+
+
+ It was in recognition of this anger that last September the Association of European Research Libraries (LIBER) published the Five Principles for Negotiations with Publishers. One of these principles (headed Transparency for Licensing Deals: No Non-Disclosure) reads, “Licensing agreements should therefore be openly available. Society will not accept confidential agreements paid for with public money in the form of non-disclosure agreements.”
+
+
+
+ One question we need to ask at this point is whether OA Big Deals are any more transparent than the traditional Big Deal. This question took centre stage when, on 17th January, Elsevier announced that it had signed a deal with the FinELib consortium based at the National Library of Finland. Like all recent deals in Europe today, the Finnish agreement had an OA component. Yet even though no NDA had been signed, there was a distinct shortage of detail about what had been agreed between Elsevier and FinELib. This was all the more striking given that shortly after the announcement FinELib publicly endorsed the LIBER transparency principles.
+
+
+
+ When I asked FinELib why so few details had been made public even though no NDA had been signed I was told, “The confidentiality of an agreement is not based on whether or not there is an NDA. At least in Finland the principle of loyalty between contracting parties needs also to be taken into account.”
+
+
+
+ Needless to say, Finnish researchers were less than impressed with this explanation, and put considerable pressure on FinELib to publish more information about the agreement. This had some success, and in February FinELib agreed to publish more details, including the total price of the deal at the consortium level, the total price per consortium member, plus the text of the Elsevier agreement.
+
+
+
+ FinELib added, however, that it was not able to publish the annual prices alongside the total price due to a requirement in the Act on the Openness of Government Activities. This, it explained, forbids the publication of “documents containing information on a private business or professional secret, as well as documents containing other comparable private business information, if access would cause economic loss to the private business.”
+
+
+
+ NDAs, it seems, may not be the whole story here.
+
+
+

+ At least four problems

+

+

+
+
+
+ The Finnish agreement also graphically demonstrates how OA Big Deals lock in large legacy publishers, to their advantage and to the disadvantage of pure OA publishers and smaller companies.
+
+
+
+ Specifically, the open access “pilot” built into the FinELib agreement gives researchers a 50% discount if they opt to publish open access in a group of Elsevier journals. Finnish researchers were quick to point out these journals are predominantly hybrid OA, not pure OA. Consequently, rather than hasten a transition to OA, they complained, the deal will embed hybrid OA in the Finnish publishing landscape for the foreseeable future. This will prolong the subscription model (and so delay the transition to OA) and elbow out pure gold OA publishers.
+
+
+
+ Frustration grew when it was discovered that Helsinki University already offers a 50% discount for publishing OA in Elsevier journals, giving the publisher an even greater advantage over pure OA journals. “Elsevier is now free (and cheaper than full OA) for a researcher,” tweeted one researcher.
+
+
+
+ It seems to me, therefore, that there are at least four problems with OA Big Deals. First, even where no NDA has been signed, transparency over what has been agreed, and the costs, remains inadequate. Second, these deals often seem as likely to delay as accelerate the transition to open access, and certainly not in the way governments and funders in Europe are now keen to see. Third, they unfairly advantage large legacy publishers. Fourth, these deals are as a result unlikely to solve the affordability problem.
+
+
+
+ On a more positive note, Finnish experience suggests that if researchers are persistent they may be able to persuade negotiators to provide more transparency that has been possible historically. However, this is more to do with heightened suspicion about publishers than open access. Either way, however, many will feel it is an insufficient reason to warrant pursuing OA Big Deals.
+
+
+

+ The Dutch approach

+

+

+
+
+
+ I argue that those negotiating OA Big Deals are shy about sharing the details of the agreements they are signing. In this regard it is worth considering the activities of one of the organisations that pioneered the OA Big Deal – VSNU. The association has been active in this space since 2015 and is keen to promote its self-styled Dutch Approach.
+
+
+
+ When it started VSNU vowed that it was going to take a hard line with legacy publishers, especially Elsevier. “We are willing to pay publishers for the work they do, but Elsevier’s profit margin is approaching 40 per cent, and universities have to do the [editing] work and pay for it”, asserted Professor Meijer, then chief negotiator for VSNU and Chairman of Radboud University Nijmegen. “We aren’t going to accept it any longer.”
+
+
+
+ As part of the sabre rattling, VSNU threatened publisher boycotts, and began to reject publishers’ offers. In 2017, for instance, it turned down the deal offered by Oxford University Press (OUP).
+
+
+
+ Nevertheless, it has gone on to agree a number of OA Big Deals, including in late 2015 one with Elsevier. This came after a year-long deadlock (and is currently up for renewal). Nevertheless, when researchers saw the details of the agreement they expressed some disappointment. And there has been rumbling discontent with the Dutch approach since, not least over its lack of transparency. Again, it took FoI requests to obtain more information and in 2016 this saw details of what Dutch universities pay to scholarly publishers become publicly available (Details here; see also here).
+
+
+
+ Currently, VSNU is engaged in a new series of negotiations. In doing so it again insists that it is taking a hard-line with publishers and indeed on 12th March it announced that it had failed to reach agreement with the Royal Society of Chemistry (RSC). Speaking to ScienceGuide Koen Becking, President of Tilburg University and board member of VSNU said, [Google translate] “We have shown that a ‘no deal’ is also possible. We stand behind our commitment: we only pay for output.”
+
+
+
+ On the same day (12th March) VSNU also reported that it had renewed its agreement with Springer Nature.
+
+
+
+ Both VSNU’s announcements consisted of little more than a brief statement with no meaningful detail. While subsequent news of a deal with OUP also included a couple of self-serving quotes, there was little else, and researchers were left scratching their heads. As one perplexed scientist commented on Google+, “The article does not mention the cost of the deal – how much Netherlands pays for the deal per year?”
+
+
+
+ If the research community doesn’t even know how much money is involved how can it hope to judge the value of any OA Big Deal. And even if we are convinced by VSNU’s robust statements about beating publishers into submission, we are not ourselves able to judge whether taxpayers’ money has really been spent effectively, let alone what exactly that money has bought.
+
+
+
+ I was alerted to the Springer Nature deal by a librarian who is clearly unhappy about the way VSNU is negotiating these deals, and the abiding lack of transparency that surrounds them. Describing herself on Twitter as an open access advocate, she emailed me to ask, “Would it not be time for some questions towards VSNU on how, why and for what amount?”
+
+
+
+ Since that seemed like a good idea I took to Twitter, and invited VSNU and Springer Nature, as well as RSC, to answer some questions.
+
+
+
+ I received no response from Springer Nature.
+
+
+
+ The RSC did reply and sent me a statement about its failed negotiation with SURF (who act on behalf of VSNU) along with an invitation to schedule a conversation to discuss the RSC’s OA work more generally (I responded by offering to visit their offices).
+
+
+
+ Unfortunately, the RSC statement tells us little, although it is interesting to learn that (amongst other things) it offered a Read and Publish option to the Dutch universities. If Sondervan is right to characterise the model that VSNU is pursuing as being Read and Publish, why did it turn down RSC’s offer? We do not know. Presumably, the issue was pricing, but without more information we cannot know who is being reasonable here and who unreasonable.
+
+
+
+ In the end, only VSNU can tell us why it turned down the RSC offer, which is partly why I contacted the organisation. That said, I was mainly interested in obtaining details of the Springer Nature deal. At first, my attempt to do so met with an encouraging response. VSNU replied to my Tweet by inviting me to email my questions to them. So, I emailed over 22 questions. Yes, it was a lot, but how else are we going to know what is going on with these deals if we don’t try to drill down beneath the press releases, beneath the corporate quotes, and beneath the declamatory statements.
+
+
+

+ Disappointed

+

+

+
+
+
+ VSNU’s Spokesperson and Advisor Public Affairs Bart Pierik responded by saying (fairly) that he would need a little time to answer my questions. I was disappointed, however, when the next day I received an email from him saying, “Considering the fact that we are finalizing some more deals with publishers at this moment (we just published good news about Oxford University Press) my proposal is that we would be glad to make one Q&A in April about all of these deals.”
+
+
+
+ I was disappointed with this response not just because it would mean a delay before details of the Springer Nature were made public, but because it seemed self-evident to me that if my 22 questions were re-focussed and rolled into a discussion about a bunch of other agreements the details of the Springer Nature deal would get lost. As I say, it is the details of these agreements that seem to me to be vital if we are ever to establish exactly what is going on and whether these OA Big Deals offer any kind of value for money.
+
+
+
+ VSNU’s response to my questions, plus the fact that it took a series of FoI requests to find out what Dutch universities are paying publishers last time around, left me a little sceptical as to how committed to transparency the organisation is. The desirability of their being transparent seems all the more pressing given that there is some discontent with the way VSNU has been negotiating OA Big Deals. At a Couperin event held in January, for instance, Becking (along with Director of Scientific Information Provision at the Max Planck Digital Library Ralph Schimmer) faced a series of awkward questions and complaints from the audience.
+
+
+
+ Concern was expressed about pricing, about the dangers of allowing large legacy publishers to lock themselves into the new OA environment, about the consequences of this for pure OA publishers and smaller companies (and for future innovation) and about the future of scholarly publishing if OA Big Deals become the norm.
+
+
+
+ (In passing we would note that Couperin, the French consortium of academic and research institutions, recently announced that it has failed to reach agreement with Springer Nature for an OA Big Deal, and so researchers in France will lose access to the publisher’s journals on April 1st). [Update 31/03: I have been advised that this was not an OA Big Deal negotiation but (presumably) a regular Big Deal. However, as The Scientist points outthe consortium was pushing for a reduction in subscription costs to account for the increasing proportion of open-access articles  for which authors pay an article processing fee to publish  in Springer's journals.” Its complicated.]
+
+
+ The questions and points aired at the Couperin meeting appeared sufficiently compelling that Becking closed the session by saying [at 43m in this video], “I have two take-home messages for myself. That is to communicate better on what we are doing and why etc. etc. And secondly to invest more in new initiatives.”
+
+
+
+ But if the Couperin meeting has encouraged VSNU as an organisation to commit to greater transparency it is not immediately evident. Why did it not provide greater details when it made its recent announcements? And why did it seek to refocus and (I assume drown out) at least some of my 22 questions? I am also not aware that anything beyond the sparse announcements made in early March (and the quote from Becking in the ScienceGuide piece cited above) has been released to the public in order to clarify what VSNU has agreed with Springer Nature and OUP (and failed to agree with RSC). The organisation also seems inclined to ignore requests for information from others.
+
+
+
+ In light of my growing scepticism, on receipt of VSNU’s email I took to Twitter again to share my disappointment. “Information on the new Springer Nature/ VSNU OA Big Deal is sparse, to say the least,” I tweeted. “VSNU agreed to answer my questions, but after seeing them decided it does not want to answer any questions on its negotiations until April.”
+
+
+
+ VSNU’s Bart Pierik responded: “This is bogus (and you know it). With many negotiations going on, we offered to make one complete Q&A, as to offer all relevant Information in one place – your website.”
+
+
+
+ My response to Pierik is here.
+
+
+

+ Public money

+

+

+
+
+
+ But what about Springer? Why did it not respond to my tweets? Again, I cannot say. However, when I later suggested that the European Commissioner for Competition might be advised to look into the growing number of OA Big Deals, I did finally get a response from the publisher – in the form of a comment from Robert Boissy, Springer Nature Director of Institutional Marketing & Account Development, Americas. He said, “So at the end of the day the OA availability is less important than the path taken to the OA availability?  I respectfully disagree.”
+
+
+
+ In thinking about the implications of Boissy’s comment it occurred to me he is saying that all that matters in the transition to open access is the end-point, not the process. But surely how one arrives at an end-point often determines the nature of that end-point. Boissy appears to be saying that accessibility is all that matters, not affordability, and not transparency. So long as the world gets OA it doesn’t matter what the taxpayer has to pay for it, or whether what is paid (and what that payment buys) is made public.
+
+
+
+ Boissy’s comment would also seem to imply that it does not matter if legacy publishers are allowed to dominate the new OA landscape.
+
+
+
+ Yet history shows us the consequences of allowing large publishers to dominate scholarly communication. Despite claims that the internet levels the playing field, the arrival of the Web has seen ever greater consolidation, coupled with a routine lack of transparency, and a consequent cycle of ever-rising prices. 
+
+
+
+ As a  paper published by PLOS ONE in 2015 called The Oligopoly of Academic Publishers in the Digital Era explained, “Combined, the top five most prolific publishers [now] account for more than 50% of all papers published in 2013. Disciplines of the social sciences have the highest level of concentration (70% of papers from the top five publishers), while the humanities have remained relatively independent (20% from top five publishers).”
+
+
+
+ As I see it, the new OA Big Deals can only accelerate the process of consolidation and push out (or swallow up) the (inevitably smaller) pure OA publishers. The latter, let’s recall, were supposed to disrupt the scholarly publishing market by making it more competitive, and driving down prices.
+
+
+
+ What happens when we get to the point where scholarly publishing is dominated by not five but just one company – let’s call it SNEW. Would it really be good for the research community? Would not that company be able to hold researchers (and the taxpayers that fund them) hostage?
+
+
+
+ Meanwhile, legacy publishers are moving into the workflow, analytical services, institutional repository, electronic notebook and data services areas of scholarly communication. Every part of the scholarly infrastructure is now threatened with appropriation and domination by large commercial publishers, with Elsevier leading the pack. Surely every OA Big Deal signed assists in this process of consolidation and appropriation, as does every agreement where the costs and terms are not released to the world. This is public money being spent on these companies, people. Is it really being spent wisely?
+
+
+
+ In short, the world really needs to know what is going on. I told VSNU I would get back to them in April. But on reflection, I wonder whether there is sufficient value in undertaking a Q&A if one allows the interviewee too much control over the questions and how and when they are answered. VSNU has my 22 questions. It is clearly free to answer them or not as it desires. Either way, it is now two weeks since I emailed them over, and two weeks since I proposed to RSC that I visit their office (for which I have yet to receive a response).
+
+
+
+ So, I will end by again inviting VSNU to answer my questions. By doing so they can help shine a light on this somewhat crepuscular corner of scholarly communication and demonstrate that affordability and transparency are just as important as accessibility.
+
+
+ +
+
+ +

37 comments:

+
+
+
+ + + Federico + said... +
+
+

+ Useful collection of information, thanks. Customary note that all this is about problems with (hybrid) gold OA, not with OA in general. Laws, policies and investments on green OA or self-managed OA journals don't seem affected by most of the arguments here, right?

Crosslinking http://bjoern.brembs.net/2016/04/how-gold-open-access-may-make-things-worse/ +

+
+ +
+ + + Richard Poynder + said... +
+
+

+ Thanks for commenting Federico.

I think open access is a good thing and should be encouraged.

My rule of thumb, however, is that any initiative that seeks to force researchers to embrace OA, that lacks transparency, or that advantages legacy publishers over pure OA publishers is not desirable.

Since my list of undesirables would encompass green OA mandates I guess few OA advocates would agree with me. And since I think the notion of charging APCs is a very bad idea a lot of OA advocates would take me to task for that too.

Self-managed scholar-led OA journals seem like a very good idea. The concern must be, however, that they will experience sustainability problems. But I am sure there are solutions to these problems. +

+
+ +
+ + + Gábor Makara + said... +
+
+

+ There is very little information on the underlying factors of the negotiating powers of the big publishers. At one point you say: "Would not that company be able to hold researchers (and the taxpayers that fund them) hostage?"
Here is one root cause: the body of the existing copyrighted scientific literature being held hostage! Until a solution if found to free the hostage, a lasting solution for the easily affordable OA world is difficult to imagine.
Unless there is a revolution... +

+
+ +
+ + + Leonid Schneider + said... +
+
+

+ I wonder who imposes this secrecy. Is it really the publishers? Does Springer or Elsevier say to university negotiators: either you promise to keep all under wraps, or we are not negotiating with you? There, we publishers cancel subscriptions, keep your stupid money, see how you like it?
I don't think this is likely. Rather, my guess is it is the university negotiators who decided to keep the process secret. Why on Earth?
Is it by tradition, because everything which happens in German or Swiss academia is secret by default and can never be accessed by any FOIA inquiries?
Do they really think they can outwit big publishers behind closed doors? Even if the Swiss employ a professor of negotiation and conflict management, I doubt they can intimidate Elsevier into submission, but what if they really believe it? What if they really think by keeping they cards out of public's prying eyes their hand will become stronger, as in some form of magic poker? Don't they understand that the publishers know exactly which cards the universities are holding (staying with poker comparison)?

Bringing these Big Dealings into public will in fact make the negotiations much, much easier. The publishers will panic, because this is absolutely the last thing they want. And they can't even avoid it by threatening to pull out of negotiations.
But of course where does it leave our esteemed academics with their love for confidentiality, their obsession for quiet backdoor dealings and their ivory tower arrogance towards those industry salesmen whom they deem to be failed scientists of insufficient intelligence?


+

+
+ +
+ + + Richard Poynder + said... +
+
+

+ Gábor, you are right, legacy publishers effectively own (via copyright transfer/exclusive rights) huge backfiles of research papers and are using them to extort large amounts of money from the public purse.

But it is not necessary to have a revolution I think. National governments should be looking at ways to repatriate this content and make it freely available to the world. Here are some of the models I have in mind. +

+
+ +
+ + + Richard Poynder + said... +
+
+

+ Not that the end game of nationalisation is normally to make services free. The aim is to return vital services and infrastructures to public ownership, for the benefit of all, not just shareholders. +

+
+ +
+ + + Rick Anderson (editor) + said... +
+
+

+ Leonid, I can promise you that university administrators are not the ones who put confidentiality clauses into licenses. I have negotiated scores of licenses with publishers in my career, and in not a single case did I add a confidentiality clause myself, nor am I aware of any librarian who has done so. On the contrary, we regularly try to negotiate them _out_. And in the case of public universities, we are not even allowed to agree to them (let alone add them!) unless they are carefully edited to allow the kinds of public disclosure that the law requires of us.

One reason that publishers want to impose confidentiality terms is so that they can customize the terms of their licenses (especially with regard to pricing) without fear of customers comparing notes with each other. +

+
+ +
+ +
+
+ Bernhard Mittermaier + said... +
+
+

+ Leonid Schneider, your assumptions are wrong.
It's not literally like: "Does Springer or Elsevier say to university negotiators: either you promise to keep all under wraps, or we are not negotiating with you?", but almost. Questions of secrecy are normally not at the beginning of the negotiation, but at the end (DEAL is a counter example). Once you have agreed on content and price, you receive the publishers' standard licence agreement and there the confidentiality clause is included. Then you either sign it or start arguing, in some cases quite long. For example, I still haven't signed one contract with a term 2016/2017, as of March 29th 2018. One of the reasons is a dispute about confidentiality. +

+
+ +
+ + + Leonid Schneider + said... +
+
+

+ Oh, I do not doubt it is the publishers who put in those clauses. But as Bernhard admits, the academic negotiators allow this and think this is perfectly appropriate, I presume for reasons I listed above.
They can't even imagine opening a negotiation with the statement: all will be public. Agree, or stuff your subscriptions +

+
+ +
+ + + Toby Green + said... +
+
+

+ A lack of transparency is one thing, but I think the bigger thing is bundling. Shifting the bundle from one side of the bed (subscriptions) to the other (authoring) will, to my mind, keep market forces from driving efficiency and lowering costs in scholarly communications. I wrote some thoughts on this last year to encourage some new thinking which I think is sorely needed. http://blogs.lse.ac.uk/impactofsocialsciences/2017/10/24/its-time-for-pushmi-pullyu-open-access-servicing-the-distinct-needs-of-readers-and-authors/ +

+
+ +
+ +
+
+ Charles Oppenheim + said... +
+
+

+ No question that it is the publishers that insist on the NDAs, and librarians that (reluctantly, but sadly nearly invariably) agree to them. Freedom of Information requests then fail on grounds of commercial confidentiality. Leonid is right - librarians need to be much more assertive in their negotiations with publishers.

Overall, a really interesting article. Thank you Richard! +

+
+ +
+ + + Rick Anderson (editor) + said... +
+
+

+ Leonid, I'm sorry, but you have no idea what you're talking about. As I said, I have deep experience in negotiating license agreements with publishers. I've been doing so for 25 years. I've been hired by organizations to train their employees in negotiating those licenses. I've written a book that deals substantially with the negotiation of licenses with publishers. I'll say it again: those who negotiate these contracts on the customer side do NOT typically see confidentiality clauses as "perfectly appropriate"; on the contrary, they routinely seek to negotiate confidentiality clauses out of the contracts, and we never put them in. (Bernhard is also mistaken in his assertion that confidentiality is typically introduced at the end of the negotiation process. I have never once seen that to be the case; in every license I've negotiated that involved a confidentiality clause--and that's the great majority of them--that clause has been included in the standard version, the one that is offered by the publisher at the beginning of the process. This may be different in the case of national deals in which the license itself is being written as part of the negotiation process, and that may be what Bernhard is referring to, but it is most certainly not the case when it comes to institutional licenses.) +

+
+ +
+ + + Lisa Janicke Hinchliffe + said... +
+
+

+ My confusion is - when there is no NDA - why the hesitancy (refusal) to disclose? +

+
+ +
+ +
+
+ Bernhard Mittermaier + said... +
+
+

+ Where exactly did I admit that "the academic negotiators allow this and think this is perfectly appropriate"? I've described that I'm doing just the opposite. +

+
+ +
+ +
+
+ Bernhard Mittermaier + said... +
+
+

+ Rick, I was referring to the procedures I know, where a written license is something that comes up in the end. We first speak about the content etc. and the price. Maybe that's not a good idea (defintely in the light of the experience with the 2016/2017 contract), but it's common practice. +

+
+ +
+ + + Rick Anderson (editor) + said... +
+
+

+ Bernhard -- yes, my apologies. I failed to notice that you were referring to a situation in which the pricing part of the negotiation is completely separate from the license negotiation, and comes first. In my experience, the two negotiations have always been conducted at the same time, or at least as part of the same general negotiation process. But my experience is all at the level of a single institution; I've never negotiated a consortial or national license. +

+
+ +
+ + + Kai Geschuhn + said... +
+
+

+ While not discounting the issue of transparency, there are a few clarifications I would like to make about these agreements.
First, we have to acknowledge that hybrid publishing is rampant and is now occurring outside of any central, institutional agreement; this means that there is an additional revenue stream, outside subscription revenues, flowing from research budgets via institutions to the large traditional publishers, unmonitored and unchecked. So, as a first step, these new transformational agreements allow institutions to take back control of the payment streams, which is a precondition for building a strong negotiating position for the next steps in the transition. Libraries and consortia have already begun to leverage that stronger position with the aim of achieving greater transparency (see below).

Secondly, it should be stated that these agreements bring enormous potential for accelerating the transition to open access as they take the first, essential step of moving money away from the subscription system and initiate a new publication-based agreement type. They are based on the understanding that spending money on subscriptions is not a good use of taxpayers money, as it constitutes an investment in an atavistic business model that does not serve the needs of 21st century researchers.

For further clarification, please let me refer you to two documents published by the ESAC initiative in which institutions articulated their concept of these agreements, e.g. that they are to be considered pilots which should lead to a solely publication driven model with no access based costs. And they pointed out that “Offsetting implies the opportunity to overcome dysfunctionalities as known from the current subscription system and to improve the business for scholarly publishing in terms of transparency and efficiency rather than to perpetuate it.”

Please find the ESAC documents here:
http://esac-initiative.org/offsetting/
http://esac-initiative.org/wp-content/uploads/2016/05/esac_offsetting_joint_understanding_offsetting.pdf

The first step towards transparency was to collect the article data from the agreements on the Open APC platform and, in the case of Springer Compact, relate them to the total numbers of hybrid and subscription articles in a given journals:
https://treemaps.intact-project.org/apcdata/offsetting-coverage/
The method and insights are described in this blog post:
https://intact-project.org/general/openapc/2018/03/22/offsetting-coverage/
It is right to have a closer look at the new agreements, but only focusing on transparency does not go far enough; meeting all the challenges involved with the open access transition is much more complex. +

+
+ +
+ + + Leonid Schneider + said... +
+
+

+ Now that we determined that Leonid is wrong about everything and has no clue what he is talking about, now what?
Didn't the experts just admit that unqualified public should stays out of the complicated negotiation business which is best left to professionals? It is really like with Open Access, the policies are done behind the scientists' back because what do they know.

My main point, that transparency should be a precondition for a negotiation with publishers from the beginning, must be so out there that the experts chose not to pay any attention to it.
Bernhard, as I said, being rude and putting me in my place might impress Elsevier(they describe me as "toxic individual")and your FZ Jülich director (let me just say: HBP), but how is this helpful to your DEAL at hand?
All you DEAL people keep saying is: shut up, leave this to us, we know what we are doing. I asked about the details of Elsevier editorial boycott (ie, do they still peer review?), was told to get lost, it's secret.
Well, reading Richard's post I am not so sure this secrecy is the best way.
+

+
+ +
+ + + Leonid Schneider + said... +
+
+

+ Reply to Kai Geschuhn of Max Planck Society (MPG)
Dear Kai,
not all research institutions are in the cushy position MPG is in, ie. not knowing where to put all that bombastic money MPG has. Those deals MPG does with publishers, both subscriptions and OA ones like Frontiers, is not something most other research institutions can match. They can't afford it, even if they do love OA very much.
See above Richard's point of affordability.
Speaking of hybrid OA: MPG as elite institution expects its directors to publish in hybrid journals like Nature and Cell, I believe... +

+
+ +
+ + + Richard Poynder + said... +
+
+

+ Lisa you say:

My confusion is - when there is no NDA - why the hesitancy (refusal) to disclose?

Let me respond by quoting from the article:

"When I asked FinELib why so few details had been made public even though no NDA had been signed I was told, 'The confidentiality of an agreement is not based on whether or not there is an NDA. At least in Finland the principle of loyalty between contracting parties needs also to be taken into account.'"

And on its website, FinELib cites the Finnish Act on the Openness of Government Activities to support this argument. +

+
+ +
+ +
+
+ Bernhard Mittermaier + said... +
+
+

+ Leonid, I'm sorry, I didn't mean to be rude.
You said either the pulishers or the librarians don't want transparency. That is OK to say.
Then you assumed, that it's the librarians. That's OK as well, as an assumption. Personally, I would stop at that point and ask if someone has a clue.
But further to that, you speculated about the librarians' reasons and state that their reasons aren't good ones. That's weird, because you rely on assumptions. I would refrain from acting so because I felt that I would disgraces myself.

Then you received two answers of people who actually know what's the case. Why do you attack me for telling you the facts? +

+
+ +
+ +
+
+ Bernhard Mittermaier + said... +
+
+

+ Lisa and Richard, I don't understand that as well. +

+
+ +
+ +
+
+ Martin Hicks + said... +
+
+

+ Excellent analysis and discussion about a broken system. The negotiators are using other people’s money (taxpayers) to prop up an oligopoly that is makings profits on a system that is no longer fit for purpose.

Taxpayers money is being used to publish research funded by taxpayers, thus the agreements should be published as a matter of duty to the public. For OA, publish the cost of the APC in the metadata for the article. This would be transparent and can be harvested.

For university libraries with subscriptions, display the cost of the subscription every time someone accesses the journal in question. Transparency! +

+
+ +
+ + + Roger Schonfeld + said... +
+
+

+ Something I have wondered is whether some of the most recent set of negotiations, through DEAL and Couperin, have seen academia offer terms that can never be accepted by the publishers. Rather than cancelling, the universities are positioned as having tried to find a middle ground. This could ultimately be more about strategic communications to faculty members than a realistic effort to reach agreement. +

+
+ +
+ + + Lisa Janicke Hinchliffe + said... +
+
+

+ I believe Roger is raising a very good point here. If that is the case though, I wonder what the next step is with this strategy ... +

+
+ +
+ + + Richard Poynder + said... +
+
+

+ Thank you for commenting Kai Geschuhn.

I am not sure why you say that I only focused on transparency. And it worries me that you seem inclined to dismiss transparency as unimportant (or not very important). It also worries me that you seem to be implying that “meeting all the challenges involved with the open access transition” is too complex to allow the wider research community and taxpayers to be properly informed about the details.

I do hear what you say about transitions, pilots and taking back control etc. But for the life of me I cannot see how what is happening right now will do anything but lock in legacy publishers and their excessive pricing to the new OA environment, for all the reasons I state in my post.

But let’s hope you get to prove me wrong! +

+
+ +
+ + + Roger Schonfeld + said... +
+
+

+ If one were pursuing such a strategy, which a DEAL leader has denies and I have accepted his denial, one would simply allow access to end without ever "cancelling," believing that paid access is no longer needed. +

+
+ +
+ + + Richard Poynder + said... +
+
+

+ To further confuse the situation, I received an email yesterday (30th March) from what I believe to be a reliable source saying that while a public announcement has been made about the deal between Springer Nature and VSNU the contract has yet to be signed. I have asked VSNU and Springer Nature for clarification. +

+
+ +
+ + + Frances Woolley + said... +
+
+

+ Richard - great article - though I have a quibble with one of your comments.

You wrote "The concern must be, however, that they [scholar led OA journals] will experience sustainability problems. But I am sure there are solutions to these problems."

How can you be sure? I'm currently president of the Canadian Economics Association, which publishes the Canadian Journal of Economics. I can't imagine how the journal would be sustainable without subscription revenue - typesetting, copyediting, hosting, management etc all costs money, many of our authors have very little research funding, and people are just not that willing to give of their time. +

+
+ +
+ + + Richard Poynder + said... +
+
+

+ 1/2

Hi Frances,

Thanks for your comment.

Let me respond to your question in this way: What I think everyone agrees on is that it costs money to publish research papers. They invariably disagree (often bitterly) about what this cost is, or should be, but I think no one argues that it is costless.

You don’t state how much it costs to run your journal, but you say you cannot imagine how it could be sustainable without subscription revenues. From this, I assume you believe that the subscription model is the most suitable one for scholarly journals (although I think your journal does offer a $3,000 OA option).

By contrast, most OA advocates believe that scholarly journals should abandon subscriptions and fund themselves by means of author fees or, increasingly, by means of institutions bulk buying APCs courtesy of OA Big Deals.

Given the benefits that open access provides, I think it makes sense to redirect money from subscription-based publishing to OA publishing, with the aim of eventually moving to an all-OA environment. And this is what we see happening today.

However, I believe that both the subscription and APC models cost the taxpayer more than is necessary, or warranted. To my mind, therefore, neither of these models is entirely satisfactory.

I also believe it should be a serious concern that those without access to APC funding (an issue you highlight) – either as individuals or courtesy of an institutional OA Big Deal – will find it increasingly difficult to get published as scholarly communication moves closer and closer to an all-OA environment.

It is for this reason (amongst others) that unhappiness with the APC model has grown, and led to a new interest in OA journals that charge neither authors nor readers but fund themselves by other means. These are normally referred to as platinum OA journals, and one example is Discrete Analysis.

Possible sources of revenue for platinum OA journals include educational grants, sponsorship, government funding and charitable donations etc. The challenge such journals face, however, is that these kinds of revenue streams are less secure than subscriptions and APCs. Grants run out, and sponsorship and donations are also generally time-limited. As such, platinum journals inevitably have to confront sustainability issues.

On the other hand, of course, these journals can be less costly to run. Discrete Analysis, for instance, is an overlay journal (and so piggybacks on arXiv) and costs just $10 per submission. This cost is currently covered by a grant from Cambridge University, so there is no charge to the author, no charge to the reader, and the running costs are very low. +

+
+ +
+ + + Richard Poynder + said... +
+
+

+ 2/2

But while I acknowledge there are sustainability issues, I think there are solutions. Here is one thought: No one I think disagrees that a lot of money is spent on scholarly communication each year. As such, the money is already in the system. It is increasingly inadequate, but since publishers are overpricing for their services, and the source of much of this money is the taxpayer, the responsible thing to do is to seek to lower costs.

The problem is that in order to achieve a fast transition to OA the money is increasingly being redirected not to new, low-cost, innovative OA journals like Discrete Analysis, but to legacy publishers. The latter are not only far too expensive, but they lack any incentive to lower their prices, not least because the dominant ones have voracious shareholders (or venture capitalists) to feed.

Thus, the issue becomes one of how one shifts the money already in the system away from expensive legacy publishers to new low-cost, innovative OA solutions.

That governments and funders are now willing to do this is evidenced by the current trend for funders to create their own publishing platforms, as we have seen with Wellcome Open Research, Gates Open Research and HRB Open Research. Most recently, the European Commission has published a call for tenders to develop Open Research Europe.

However, the latter is being done at the same time as the research community is funnelling more and more money to legacy publishers via traditional Big Deals and new non-transparent, clearly overly expensive, OA Big Deals.

The outcome must surely be that costs will increase rather than reduce. Moreover, the money needed to develop better, cheaper, more innovative scholarly publishing solutions could dry up in order to feed the legacy publishers.

And since this is allowing legacy publishers to embed themselves, and their excessive prices, into the new OA environment it is hard to see how the affordability problem the research community faces can be resolved.

Moreover, once the legacy publishers are embedded in the new system it will be far more difficult to recover the situation and resolve the affordability problem. Better by far to seize the nettle today, and stop signing OA Big Deals. +

+
+ +
+ + + Frances Woolley + said... +
+
+

+ Thanks for these thoughtful comments.

I think in Canada the hope is that the government will be able to fund non-profit gold open access publishers through government grants to publishers and journals. The idea is government grants to journals and publishers, plus research funds, plus the work that is done on a voluntary basis anyways (like reviewing, writing), plus potentially redirection of funds from libraries to researchers (though this is not explicitly on the table), plus a 12 month embargo periods would be enough to make OA feasible (see e.g. https://www.calj-acrs.ca/news/reminder-important-and-urgent-update-future-sshrc-funding-journals)

What makes it particularly interesting in Canada, however, is that we're a small research community highly integrated into a very large one. If our journal, the CJE, was to move to gold-standard open access, the gainers could be all of the libraries who don't have to pay for a CJE subscription any more (I said "could" rather than "will" because given the non-transparency in journal pricing, it's not obvious that would happen). Most of those libraries are outside Canada. There's no mechanism for them to compensate the CJE. Likewise, since most of the journals Canadian libraries subscribe to are based outside the country, a unilateral move towards open access by Canadian journals isn't going to free up a lot of library funds, so there's no funds there to compensate Canadian researchers. The Big Deal might seem like the solution here - your post explains very convincingly why it isn't.

Economics is also a unique discipline. It's strongly hierarchical, so much so that publications in low-ranked journals have *negative* reputational effects. Status is everything, and a journal's publisher signals its status. If CJE was to move to a non-profit Canadian open access publisher, would we still be taken as seriously? Perhaps the answer to that question is yes, but it's a big risk to take.

Economics content also has science-level publication costs (lots of tables, figures, equations, to typeset, authors who need a lot of copy editing) without science-level research funding. This makes the government grants+voluntary scholarly contributions model more challenging.

I get that there are huge problems with the status quo. But sometimes I wonder if the best solution isn't something far more radical - like getting rid of journals entirely, and use some other mechanism to assess the quality of content e.g. blog comments, author's reputation, conference presentations, grant funding, external rating systems by, e.g., the academic equivalent of Roger Ebert or Rotten Tomatoes, etc. +

+
+ +
+ + + Lorraine Estelle + said... +
+
+

+ Negotiating national deals with journal publishers is tough! The negotiations are not like a haggle in the market place. There is no alternative vendor for unique journal content and no alternative (ideal) vendor for the publication of an OA Gold Article. Library consortia have responded to a growth in Gold OA, by negotiating offset deals. While by no means perfect, without the deals, the cost would be significantly more.
In my experience, library consortia always resist NDAs. However, publicly discussing the details of a negotiation is not professional and would not necessarily work in the best interest of a consortiums members.
Please also not that library consortia via ICOLC http://icolc.net/ do share information and support each other. +

+
+ +
+ + + Richard Poynder + said... +
+
+

+ What I find odd here is that the Program Manager Open Access at VSNU responded to this post by saying that she couldn't agree with me more about the need for transparency in OA Big Deals.

Yet VSNU has not responded to my last email and appears to have made no attempt answer the questions I sent to them.

I find it very hard, therefore, to believe that there is any real commitment to transparency at VSNU. +

+
+ +
+ + + Björn Brembs + said... +
+
+

+ One of the big Elephants in the room has not been mentioned so far (or I missed it, sorry in that case): already now, when they only concern reading, dropping subscriptions has been very difficult. With walking away not being an option, publishers have essentially been able to charge what they want. That's when the only issue with a lack of subscriptions is a few additional keystrokes, or an email to the author.

With Big Deals for *publishing* the issues ensuing with no contract become not being able to publish, i.e., no funding, no promotion, no tenure, no job. In other words, libraries will be in an even worse negotiation position than now. Faculty really would litterally set the library on fire if they canceled a *publishing* deal.

Why would libraries willingly do this to themselves? +

+
+ +
+ + + Richard Poynder + said... +
+
+

+ You raise a good point Björn, and as I recall it was a point made at the Couperin meeting earlier this year.

One of the questions I put to VSNU that I have received no answer to was this:

What happens if an organisation like VSNU agrees one of these OA Big Deals with a large legacy publisher and then when it comes up for renewal cannot agree on pricing for the new one. Much has been made of the fact that researchers can get access to journal articles if a subscription Big Deal is not renewed, but what happens if an OA Big Deal fails? Researchers will presumably struggle to pay to publish their papers and so are more vulnerable? +

+
+ +
+ + + Richard Poynder + said... +
+
+

+ Demands for greater transparency in Big Deals with scholarly publishers continue to grow: Finnish researchers have made a Freedom of Information (FOI) request to FinELib, in accordance with the Finnish Freedom of Information Act (“Julkisuuslaki”).

"In particular, we request the full contract texts and the total cost information per subscribing institution and per year, as specified in the contracts for all recent deals whose terms are starting from the beginning of 2018. These include, at least, the deals with Emerald (journal package) and IEEE (IEL database) and with Wiley-Blackwell (the 2014 Full Collection), American Chemical Society (journal package), OVID (LWW journal package) and Springer (SpringerCompact journal package)." +

+
+ +
+
+ + +
+
+
+
+
+ + Newer Post + + + Older Post + + Home +
+
+
+ +
+
+
+
+
+
+ +
+
+
+
+ +
+
+
+
+ +
+ +
+
+
+
+
+
+
+
+ +
+ +
+
+
+
+
+
+
+
+ + + + \ No newline at end of file diff --git a/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/test_space_exception2.html b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/test_space_exception2.html new file mode 100644 index 00000000..7cff35e3 --- /dev/null +++ b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/test_space_exception2.html @@ -0,0 +1,500 @@ + + + + Fingerprint patterns | Tholath's Weblog + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+

Archive for Fingerprint patterns

+ +
+

Fingerprint Patterns

+ Posted in Uncategorized with tags on October 24, 2009 by tholath +
+

a_fingerprint

+

I do swear by the Day of Resurrection.  I do swear by the reproachful soul. Does man assume that We can never assemble his bones? Yes indeed, we are able to put together in perfect order the very tips of his fingers” – (Quran 75: 1-4)

+


+

+

+

A fingerprint is an impression of the friction ridges on all parts of the finger. A friction ridge is a raised portion of the epidermis on the palmar (palm) or digits (fingers and toes) or plantar (sole) skin, consisting of one or more connected ridge units of friction ridge skin. These are sometimes known as “epidermal ridges” which are caused by the underlying interface between the dermal papillae of the dermis and the interpapillary (rete) pegs of the epidermis. These epidermal ridges serve to amplify vibrations triggered when fingertips brush across an uneven surface, better transmitting the signals to sensory nerves involved in fine texture perception. The ridges assist in gripping rough surfaces, as well as smooth wet surfaces.

+

Fingerprints may be deposited in natural secretions from the eccrine glands present in friction ridge skin (secretions consisting primarily of water) or they may be made by ink or other contaminants transferred from the peaks of friction skin ridges to a relatively smooth surface such as a fingerprint card.The term fingerprint normally refers to impressions transferred from the pad on the last joint of fingers and thumbs, though fingerprint cards also typically record portions of lower joint areas of the fingers (which are also used to make identifications).

+

The patterns

+

Fingerprints of a suspected or an arrested person are taken by the Police. This is to facilitate identification of the person, or use it to trace the involvement of the person in the crime. Fingerprints are the most conclusive means of personal identification of humans. The purpose behind the systematic storage is to enable easy and quick retrieval of the subject’s fingerprints. Sir Francis Galton was instrumental in identifying the patterns and their significance. It was Sir Edward Richard Henry, who made the distinction between the nine different patterns and described them. Fundamentally fingerprints are in four groups. They are:

+

Arches.
+ • Plain Arch [A]
+ • Tented Arch [T]

+

Loops.
+ • Radial Loop [R]
+ • Ulnar Loop [U]

+

Whorls [W]
+ (a) Spiral; (b) Concentric.

+

Composites.
+ • Twinned Loop [TL or S]
+ • Central Pocket Loop [CP ]
+ • Lateral Pocket Loop [LP or S]
+ • Accidental [X]

+

+

ARCHES: Arches are fingerprint patterns in which the ridges traverse form on side to the other, without recurving. They are further sub-classified as Plain and Tented Arch.

+

Plain Arch [A]

+

Plain Arch [A]

+

Plain Arch: This type of arch has ridges flowing smoothly from one side of the pattern to the other, with a slight uprising in the middle region. There are no other features in this type of pattern. They are denoted by the Alphabet [A].They are denoted by the Alphabet [A].

+

Tented Arch [T]

+

Tented Arch [T]

+

Tented Arch: the ridges in this pattern flow from one side of the finger to the other with a sharp uprising in the middle. The core or the mid region of the pattern contains a pole like ridge, which is similar to the pole of a tent. Hence, they are known as Tented Arch. They are denoted by the Alphabet [T].

+

LOOPS: Loops are patterns where at least one ridge flows from one side of the finger to another, to traverse and recurve to the side of its origin. It is shaped like a hairpin. Loops have one delta only. There are two types of loops. They are Radial and Ulanr Loops.

+

Radial Loop [R]: the ridges originate from the side of the side of the radial bone of the forearm and exit in the same side.

+

Ulnar loop [U]: the ridges originate from the side of the little finger and exits in the same side.
+ In order to distinguish between Ulnar and radial loops you must:

+

1) Know from which hand the loop pattern comes from and;
+ 2) place your hand palm side down over top of the impression and determine if the recurving ridges originate from the little finger side or the thumb side.

+

If the ridges flow in from the little finger side this would be an ‘Ulnar’ loop. If the ridges flow in from the thumb side this would be a ‘radial’ loop.

+

loop1

+

loop2

+

Loop Patterns

+


+

+

WHORLS: whorls are patterns where the ridges recurve several times to form either a spiral, or concentric circles around the central part of the pattern, called the core. They have two deltas (the delta point is a pattern of a fingerprint that resembles the Greek letter delta. It’s the point on a friction ridge at or nearest to the point of divergence of two type lines) on either side. The whorl pattern consists of one or more free recurving ridges and two points of delta. When the line of the fingerprint disc is placed on the two points of delta, it will bisect at least one of the ridges belonging to the core group. They are all designated by the alphabet [W].

+

whorl1

+

Whorl

+

FINGERPRINT

+

COMPOSITES: These patterns are a combination of two of the above patterns. They have two deltas.

+

Central Pocket Loop

+

Central Pocket Loop

+

Lateral Pocket Loop

+

Lateral Pocket Loop

+

Twinned Loop

+

Twinned Loop

+

+

Accidental

+

Accidental

+

+

Twinned Loop (T.L / S):

+

In this type, one loop is found intertwined with another. The point of origin and exit of one loop are different from that of the other. Two deltas can be seen in this pattern.

+

Double loop:

+

In this pattern, two loops originate and end at the same point. Here too there are two deltas.

+

Central Pocket (C.P.):

+

In Central pocket loops, the majority of the ridges take the form of a loop, but one or more ridges recurve at the core to form a pocket. Two deltas are present in this pattern.

+

Lateral pocket (L.P.):

+

When the ridges constituting the loop bend sharply downwards on one side before re-curving, thereby forming on that side an inter-space or ‘pocket’, usually filled by the ridges of another loop, the impression is termed as a lateral pocket loop. The core is placed laterally and there are two deltas.

+

Accidental (X):

+

If the impression is too irregular to be classified in any of the above categories, it is known as Accidental. Here there may be more than two deltas.

+

References:

+ +
+ +
+ +
+ + +
+ + + + + + +
+
+ +
+ Follow +
+

+
+

Get every new post delivered to your Inbox.

+ +

+ + + + + +

+
+ +
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html_data_input.jsonl b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html_data_input.jsonl index 86da0703..bd3a85ae 100644 --- a/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html_data_input.jsonl +++ b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html_data_input.jsonl @@ -88,3 +88,10 @@ {"track_id": "test_math_namespace", "dataset_name": "test_math_namespace", "url": "https://www.mdpi.com/1424-8220/14/3/5536","data_source_category": "HTML", "path":"math_miss_namespace.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}} {"track_id": "test_math_paragraph", "dataset_name": "test_math_paragraph", "url": "https://worldscientific.com/author/Wang%2C+Zhaojie?ConceptID=130214&ConceptID=130213&startPage=&ContribRaw=Zhang%2C+Jinbao","data_source_category": "HTML", "path":"math_extra_line_breaks.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}} {"track_id": "test_csnd_none_formular", "dataset_name": "test_csnd_none_formular", "url": "https://blog.csdn.net/m0_66248056/article/details/142639406?ops_request_misc=%257B%2522request%255Fid%2522%253A%25221c25a5e5a991ed02124823f82305fd4c%2522%252C%2522scm%2522%253A%252220140713.130102334..%2522%257D&request_id=1c25a5e5a991ed02124823f82305fd4c&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~top_click~default-2-142639406-null-null.142^v102^pc_search_result_base4&utm_term=%E5%BE%AE%E7%A7%AF%E5%88%86&spm=1018.2226.3001.4187","data_source_category": "HTML", "path":"math_csdn_none_formula.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}} +{"track_id": "ee0a113d-bdd5-48cf-ac9c-a7cadfbfb74b", "dataset_name": "CC", "url": "http://campbellhonda-newry.usedcars.honda.co.uk/en/used-cars/approved-cars/honda/civic-10-vtec-turbo-sr-5-door/details-r1bsa77","data_source_category": "HTML", "path":"double_ul.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}} +{"track_id": "87da087c-8168-4796-9f8c-c493d4b2a6a5", "dataset_name": "CC", "url": "https://poynder.blogspot.com/2018/03/the-open-access-big-deal-back-to-future.html?showComment=1522409938835","data_source_category": "HTML", "path":"test_space_exception1.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}} +{"track_id": "c3ab8231-764e-4144-abd4-694f305e005b", "dataset_name": "CC", "url": "https://tholath.wordpress.com/tag/fingerprint-patterns/","data_source_category": "HTML", "path":"test_space_exception2.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}} +{"track_id": "0b3d75b1-734f-4268-9ef9-c8dc1f6b2786", "dataset_name": "CC", "url": "https://yogaforlife06.com/blog/2019/12/","data_source_category": "HTML", "path":"audio_to_code_exception.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}} +{"track_id": "cd7f9462-d159-40bf-af2d-b322f30033e2", "dataset_name": "CC", "url": "https://www.engineering-admissions.com/m-s-ramaiah-institute-of-technology-msrit-bangalore-karnataka/","data_source_category": "HTML", "path":"list_to_code_exception.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}} +{"track_id": "8ec3baea-f43f-4f57-bde4-b67507bd56c1", "dataset_name": "CC", "url": "http://www.rbej.com/content/10/1/90","data_source_category": "HTML", "path":"sub_sup_exception.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}} +{"track_id": "8061e636-31c3-4cfa-ac1a-ad1a2c38360c", "dataset_name": "CC", "url": "https://blue-reg.com/categorie/p/","data_source_category": "HTML", "path":"list_item_notext.html", "file_bytes": 1000, "page_layout_type":"artical", "meta_info": {"input_datetime": "2020-01-01 00:00:00"}} diff --git a/tests/llm_web_kit/extractor/test_extractor_chain.py b/tests/llm_web_kit/extractor/test_extractor_chain.py index b7d481d3..e1710e0e 100644 --- a/tests/llm_web_kit/extractor/test_extractor_chain.py +++ b/tests/llm_web_kit/extractor/test_extractor_chain.py @@ -63,7 +63,7 @@ def setUp(self): continue self.data_json.append(json.loads(line)) - assert len(self.data_json) == 90 + assert len(self.data_json) == 97 # Config for HTML extraction self.config = load_pipe_tpl('html-test') @@ -763,3 +763,79 @@ def test_csnd_none_formula(self): # print('Markdown Content:', md_content) self.assertIn(r'$\lim\limits_{x \to 1}\dfrac{x^2-1}{x-1}$', md_content) self.assertIn(r'\begin{aligned} \frac{f(1.01)-f(1)}{1.01-1} &= \frac{1.01^2-1^2}{0.01} \\ &= \frac{0.0201}{0.01} \\ &= 2.01\end{aligned}', md_content) + + def test_double_ul(self): + """测试双重ul标签.""" + chain = ExtractSimpleFactory.create(self.config) + self.assertIsNotNone(chain) + test_data = self.data_json[90] + input_data = DataJson(test_data) + result = chain.extract(input_data) + content_md = result.get_content_list().to_nlp_md() + assert 'Wheels and Tyres' in content_md + + def test_other_space(self): + """测试括号、双引单引号等中文符号导致的空格.""" + chain = ExtractSimpleFactory.create(self.config) + self.assertIsNotNone(chain) + test_data = self.data_json[91] + input_data = DataJson(test_data) + result = chain.extract(input_data) + content_md = result.get_content_list().to_nlp_md() + assert '(APC)' in content_md + test_data = self.data_json[92] + input_data = DataJson(test_data) + result = chain.extract(input_data) + content_md = result.get_content_list().to_nlp_md() + assert '“I do swear by the Day of Resurrection.' in content_md + + def test_classname_to_code(self): + """测试由于classname导致的audio、list识别为code的情况.""" + chain = ExtractSimpleFactory.create(self.config) + self.assertIsNotNone(chain) + test_data = self.data_json[93] + input_data = DataJson(test_data) + result = chain.extract(input_data) + content_list = result.get_content_list().to_dict()[0] + types = [] + # 1 + for i in range(len(content_list)): + types.append(content_list[i]['type']) + with open('output.jsonl', 'w') as f: + f.write(result.get_content_list().to_json()) + assert 'code' not in types + test_data = self.data_json[94] + input_data = DataJson(test_data) + result = chain.extract(input_data) + content_list = result.get_content_list().to_dict()[0] + types = [] + + for i in range(len(content_list)): + types.append(content_list[i]['type']) + assert 'code' not in types + + def test_sup_escape_error(self): + """测试被转义成