[dpdk-dev] [PATCH v3 1/3] lib/lpm: not inline unnecessary functions

Medvedkin, Vladimir vladimir.medvedkin at intel.com
Fri Jun 28 15:47:14 CEST 2019


Hi all,

On 28/06/2019 05:34, Stephen Hemminger wrote:
> On Fri, 28 Jun 2019 02:44:54 +0000
> "Ruifeng Wang (Arm Technology China)"<Ruifeng.Wang at arm.com>  wrote:
>
>>>    
>>>> Tests showed that the function inlining caused performance drop on
>>>> some x86 platforms with the memory ordering patches applied.
>>>> By force no-inline functions, the performance was better than before
>>>> on x86 and no impact to arm64 platforms.
>>>>
>>>> Suggested-by: Medvedkin Vladimir<vladimir.medvedkin at intel.com>
>>>> Signed-off-by: Ruifeng Wang<ruifeng.wang at arm.com>
>>>> Reviewed-by: Gavin Hu<gavin.hu at arm.com>   
>>>   {
>>>
>>> Do you actually need to force noinline or is just taking of inline enough?
>>> In general, letting compiler decide is often best practice.
>> The force noinline is an optimization for x86 platforms to keep rte_lpm_add() API
>> performance with memory ordering applied.
> I don't think you answered my question. What does a recent version of GCC do
> if you drop the inline.
>
> Actually all the functions in rte_lpm should drop inline.
I'm agree with Stephen. If it is not a fastpath and size of function is 
not minimal it is good to remove inline qualifier for other control 
plane functions such as rule_add/delete/find/etc and let the compiler 
decide to inline it (unless it affects performance).
> diff --git a/lib/librte_lpm/rte_lpm.c b/lib/librte_lpm/rte_lpm.c
> index 6b7b28a2e431..ffe07e980864 100644
> --- a/lib/librte_lpm/rte_lpm.c
> +++ b/lib/librte_lpm/rte_lpm.c
> @@ -399,7 +399,7 @@ MAP_STATIC_SYMBOL(void rte_lpm_free(struct rte_lpm *lpm),
>    * are stored in the rule table from 0 - 31.
>    * NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
>    */
> -static inline int32_t
> +static int32_t
>   rule_add_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
>   	uint8_t next_hop)
>   {
> @@ -471,7 +471,7 @@ rule_add_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
>   	return rule_index;
>   }
>   
> -static inline int32_t
> +static int32_t
>   rule_add_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
>   	uint32_t next_hop)
>   {
> @@ -547,7 +547,7 @@ rule_add_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
>    * Delete a rule from the rule table.
>    * NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
>    */
> -static inline void
> +static void
>   rule_delete_v20(struct rte_lpm_v20 *lpm, int32_t rule_index, uint8_t depth)
>   {
>   	int i;
> @@ -570,7 +570,7 @@ rule_delete_v20(struct rte_lpm_v20 *lpm, int32_t rule_index, uint8_t depth)
>   	lpm->rule_info[depth - 1].used_rules--;
>   }
>   
> -static inline void
> +static void
>   rule_delete_v1604(struct rte_lpm *lpm, int32_t rule_index, uint8_t depth)
>   {
>   	int i;
> @@ -597,7 +597,7 @@ rule_delete_v1604(struct rte_lpm *lpm, int32_t rule_index, uint8_t depth)
>    * Finds a rule in rule table.
>    * NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
>    */
> -static inline int32_t
> +static int32_t
>   rule_find_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth)
>   {
>   	uint32_t rule_gindex, last_rule, rule_index;
> @@ -618,7 +618,7 @@ rule_find_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth)
>   	return -EINVAL;
>   }
>   
> -static inline int32_t
> +static int32_t
>   rule_find_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth)
>   {
>   	uint32_t rule_gindex, last_rule, rule_index;
> @@ -642,7 +642,7 @@ rule_find_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth)
>   /*
>    * Find, clean and allocate a tbl8.
>    */
> -static inline int32_t
> +static int32_t
>   tbl8_alloc_v20(struct rte_lpm_tbl_entry_v20 *tbl8)
>   {
>   	uint32_t group_idx; /* tbl8 group index. */
> @@ -669,7 +669,7 @@ tbl8_alloc_v20(struct rte_lpm_tbl_entry_v20 *tbl8)
>   	return -ENOSPC;
>   }
>   
> -static inline int32_t
> +static int32_t
>   tbl8_alloc_v1604(struct rte_lpm_tbl_entry *tbl8, uint32_t number_tbl8s)
>   {
>   	uint32_t group_idx; /* tbl8 group index. */
> @@ -709,7 +709,7 @@ tbl8_free_v1604(struct rte_lpm_tbl_entry *tbl8, uint32_t tbl8_group_start)
>   	tbl8[tbl8_group_start].valid_group = INVALID;
>   }
>   
> -static inline int32_t
> +static int32_t
>   add_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
>   		uint8_t next_hop)
>   {
> @@ -777,7 +777,7 @@ add_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
>   	return 0;
>   }
>   
> -static inline int32_t
> +static int32_t
>   add_depth_small_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
>   		uint32_t next_hop)
>   {
> @@ -846,7 +846,7 @@ add_depth_small_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
>   	return 0;
>   }
>   
> -static inline int32_t
> +static int32_t
>   add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
>   		uint8_t next_hop)
>   {
> @@ -971,7 +971,7 @@ add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
>   	return 0;
>   }
>   
> -static inline int32_t
> +static int32_t
>   add_depth_big_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
>   		uint32_t next_hop)
>   {
> @@ -1244,7 +1244,7 @@ BIND_DEFAULT_SYMBOL(rte_lpm_is_rule_present, _v1604, 16.04);
>   MAP_STATIC_SYMBOL(int rte_lpm_is_rule_present(struct rte_lpm *lpm, uint32_t ip,
>   		uint8_t depth, uint32_t *next_hop), rte_lpm_is_rule_present_v1604);
>   
> -static inline int32_t
> +static int32_t
>   find_previous_rule_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
>   		uint8_t *sub_rule_depth)
>   {
> @@ -1266,7 +1266,7 @@ find_previous_rule_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
>   	return -1;
>   }
>   
> -static inline int32_t
> +static int32_t
>   find_previous_rule_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
>   		uint8_t *sub_rule_depth)
>   {
> @@ -1288,7 +1288,7 @@ find_previous_rule_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
>   	return -1;
>   }
>   
> -static inline int32_t
> +static int32_t
>   delete_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
>   	uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
>   {
> @@ -1381,7 +1381,7 @@ delete_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
>   	return 0;
>   }
>   
> -static inline int32_t
> +static int32_t
>   delete_depth_small_v1604(struct rte_lpm *lpm, uint32_t ip_masked,
>   	uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
>   {
> @@ -1483,7 +1483,7 @@ delete_depth_small_v1604(struct rte_lpm *lpm, uint32_t ip_masked,
>    * Return of value > -1 means tbl8 is in use but has all the same values and
>    * thus can be recycled
>    */
> -static inline int32_t
> +static int32_t
>   tbl8_recycle_check_v20(struct rte_lpm_tbl_entry_v20 *tbl8,
>   		uint32_t tbl8_group_start)
>   {
> @@ -1530,7 +1530,7 @@ tbl8_recycle_check_v20(struct rte_lpm_tbl_entry_v20 *tbl8,
>   	return -EINVAL;
>   }
>   
> -static inline int32_t
> +static int32_t
>   tbl8_recycle_check_v1604(struct rte_lpm_tbl_entry *tbl8,
>   		uint32_t tbl8_group_start)
>   {
> @@ -1577,7 +1577,7 @@ tbl8_recycle_check_v1604(struct rte_lpm_tbl_entry *tbl8,
>   	return -EINVAL;
>   }
>   
> -static inline int32_t
> +static int32_t
>   delete_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
>   	uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
>   {
> @@ -1655,7 +1655,7 @@ delete_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
>   	return 0;
>   }
>   
> -static inline int32_t
> +static int32_t
>   delete_depth_big_v1604(struct rte_lpm *lpm, uint32_t ip_masked,
>   	uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
>   {

-- 
Regards,
Vladimir



More information about the dev mailing list